aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/AliasSetTracker.cpp2
-rw-r--r--llvm/lib/Analysis/BasicAliasAnalysis.cpp2
-rw-r--r--llvm/lib/Analysis/BranchProbabilityInfo.cpp2
-rw-r--r--llvm/lib/Analysis/CFG.cpp2
-rw-r--r--llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp2
-rw-r--r--llvm/lib/Analysis/CFLGraph.h3
-rw-r--r--llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp2
-rw-r--r--llvm/lib/Analysis/ConstraintSystem.cpp2
-rw-r--r--llvm/lib/Analysis/CostModel.cpp26
-rw-r--r--llvm/lib/Analysis/DDG.cpp4
-rw-r--r--llvm/lib/Analysis/Delinearization.cpp2
-rw-r--r--llvm/lib/Analysis/DivergenceAnalysis.cpp8
-rw-r--r--llvm/lib/Analysis/GlobalsModRef.cpp4
-rw-r--r--llvm/lib/Analysis/IVDescriptors.cpp16
-rw-r--r--llvm/lib/Analysis/IVUsers.cpp2
-rw-r--r--llvm/lib/Analysis/InstructionPrecedenceTracking.cpp4
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp6
-rw-r--r--llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp4
-rw-r--r--llvm/lib/Analysis/Lint.cpp2
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp184
-rw-r--r--llvm/lib/Analysis/LoopCacheAnalysis.cpp29
-rw-r--r--llvm/lib/Analysis/LoopInfo.cpp19
-rw-r--r--llvm/lib/Analysis/MemoryBuiltins.cpp125
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp10
-rw-r--r--llvm/lib/Analysis/MemoryProfileInfo.cpp226
-rw-r--r--llvm/lib/Analysis/MemorySSAUpdater.cpp16
-rw-r--r--llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp2
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp32
-rw-r--r--llvm/lib/Analysis/MustExecute.cpp18
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp139
-rw-r--r--llvm/lib/Analysis/StackLifetime.cpp2
-rw-r--r--llvm/lib/Analysis/StackSafetyAnalysis.cpp20
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp10
-rw-r--r--llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp5
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp12
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp4
-rw-r--r--llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp3
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp55
-rw-r--r--llvm/lib/Bitcode/Reader/MetadataLoader.cpp27
-rw-r--r--llvm/lib/Bitcode/Reader/ValueList.h1
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp5
-rw-r--r--llvm/lib/Bitcode/Writer/ValueEnumerator.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp12
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp55
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp6
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp6
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp6
-rw-r--r--llvm/lib/CodeGen/BasicBlockSections.cpp8
-rw-r--r--llvm/lib/CodeGen/CalcSpillWeights.cpp4
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp48
-rw-r--r--llvm/lib/CodeGen/DFAPacketizer.cpp2
-rw-r--r--llvm/lib/CodeGen/EarlyIfConversion.cpp10
-rw-r--r--llvm/lib/CodeGen/ExpandVectorPredication.cpp87
-rw-r--r--llvm/lib/CodeGen/FaultMaps.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp20
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp89
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp30
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp2
-rw-r--r--llvm/lib/CodeGen/HardwareLoops.cpp6
-rw-r--r--llvm/lib/CodeGen/ImplicitNullChecks.cpp2
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp22
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp2
-rw-r--r--llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp8
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp49
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp6
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.cpp2
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp13
-rw-r--r--llvm/lib/CodeGen/LiveRangeEdit.cpp23
-rw-r--r--llvm/lib/CodeGen/LiveVariables.cpp3
-rw-r--r--llvm/lib/CodeGen/LowerEmuTLS.cpp2
-rw-r--r--llvm/lib/CodeGen/MIRCanonicalizerPass.cpp4
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp2
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIRParser.cpp4
-rw-r--r--llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineCSE.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineCombiner.cpp49
-rw-r--r--llvm/lib/CodeGen/MachineFrameInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineFunctionSplitter.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp12
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp17
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp24
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineStableHash.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineTraceMetrics.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp4
-rw-r--r--llvm/lib/CodeGen/RDFGraph.cpp4
-rw-r--r--llvm/lib/CodeGen/RDFLiveness.cpp12
-rw-r--r--llvm/lib/CodeGen/ReachingDefAnalysis.cpp2
-rw-r--r--llvm/lib/CodeGen/RegAllocBase.cpp2
-rw-r--r--llvm/lib/CodeGen/RegAllocBasic.cpp1
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp7
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp16
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.h3
-rw-r--r--llvm/lib/CodeGen/RegAllocPBQP.cpp2
-rw-r--r--llvm/lib/CodeGen/RegAllocScore.cpp5
-rw-r--r--llvm/lib/CodeGen/RegAllocScore.h4
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp4
-rw-r--r--llvm/lib/CodeGen/RegisterPressure.cpp6
-rw-r--r--llvm/lib/CodeGen/SafeStack.cpp4
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectOptimize.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp327
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp12
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp18
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp140
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp24
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp131
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp128
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp95
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp42
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp114
-rw-r--r--llvm/lib/CodeGen/SlotIndexes.cpp11
-rw-r--r--llvm/lib/CodeGen/SplitKit.cpp14
-rw-r--r--llvm/lib/CodeGen/SplitKit.h7
-rw-r--r--llvm/lib/CodeGen/StackMaps.cpp2
-rw-r--r--llvm/lib/CodeGen/SwiftErrorValueTracking.cpp2
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp2
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp4
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp2
-rw-r--r--llvm/lib/CodeGen/TwoAddressInstructionPass.cpp6
-rw-r--r--llvm/lib/CodeGen/TypePromotion.cpp6
-rw-r--r--llvm/lib/CodeGen/VLIWMachineScheduler.cpp2
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinker.cpp64
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp6
-rw-r--r--llvm/lib/DWARFLinker/DWARFStreamer.cpp20
-rw-r--r--llvm/lib/DWP/DWP.cpp19
-rw-r--r--llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp6
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFContext.cpp2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp4
-rw-r--r--llvm/lib/DebugInfo/Symbolize/Markup.cpp3
-rw-r--r--llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp389
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp135
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp254
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp15
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp341
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp3
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp31
-rw-r--r--llvm/lib/IR/Function.cpp2
-rw-r--r--llvm/lib/IR/GCStrategy.cpp5
-rw-r--r--llvm/lib/IR/Globals.cpp37
-rw-r--r--llvm/lib/IR/InlineAsm.cpp20
-rw-r--r--llvm/lib/IR/Instructions.cpp13
-rw-r--r--llvm/lib/IR/IntrinsicInst.cpp22
-rw-r--r--llvm/lib/IR/Verifier.cpp110
-rw-r--r--llvm/lib/Linker/IRMover.cpp32
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp82
-rw-r--r--llvm/lib/MC/MCContext.cpp18
-rw-r--r--llvm/lib/MC/MCMachOStreamer.cpp18
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp33
-rw-r--r--llvm/lib/MC/MCPseudoProbe.cpp3
-rw-r--r--llvm/lib/MC/MachObjectWriter.cpp25
-rw-r--r--llvm/lib/MC/WinCOFFObjectWriter.cpp1
-rw-r--r--llvm/lib/MC/XCOFFObjectWriter.cpp43
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.cpp3
-rw-r--r--llvm/lib/Object/Archive.cpp61
-rw-r--r--llvm/lib/Object/Decompressor.cpp39
-rw-r--r--llvm/lib/Object/WasmObjectFile.cpp2
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp2
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp84
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMapping.cpp12
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp38
-rw-r--r--llvm/lib/Support/AddressRanges.cpp57
-rw-r--r--llvm/lib/Support/CommandLine.cpp2
-rw-r--r--llvm/lib/Support/Compression.cpp65
-rw-r--r--llvm/lib/Support/DivisionByConstantInfo.cpp8
-rw-r--r--llvm/lib/Support/RISCVISAInfo.cpp2
-rw-r--r--llvm/lib/TableGen/JSONBackend.cpp4
-rw-r--r--llvm/lib/TableGen/Record.cpp8
-rw-r--r--llvm/lib/TableGen/TGParser.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp55
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp119
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h9
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td6
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td25
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td36
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp24
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp86
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h10
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp65
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td17
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp67
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributes.def1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp19
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGISel.td4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp76
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp166
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp32
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp45
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp47
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td16
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp50
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp102
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp91
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h1
-rw-r--r--llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/GCNProcessors.td4
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h7
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp323
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp40
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrFormats.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td110
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h6
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp309
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp82
-rw-r--r--llvm/lib/Target/AMDGPU/SIProgramInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/SMInstructions.td119
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h2
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td78
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td100
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td38
-rw-r--r--llvm/lib/Target/AMDGPU/VOPCInstructions.td26
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td42
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h3
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp15
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp6
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp42
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td6
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb.td8
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp14
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h8
-rw-r--r--llvm/lib/Target/AVR/AVRSubtarget.h14
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp8
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp5
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp6
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp128
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h100
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp95
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXUtilities.h10
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp25
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h3
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp13
-rw-r--r--llvm/lib/Target/RISCV/RISCV.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCV.td26
-rw-r--r--llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp169
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp27
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp376
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp112
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h13
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td7
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoM.td22
-rw-r--r--llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.cpp8
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp7
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp5
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp10
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h13
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp15
-rw-r--r--llvm/lib/Target/SPIRV/SPIRV.h1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp164
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp201
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.h6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp61
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp425
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h53
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp14
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.h1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.td24
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp252
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp7
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp168
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h8
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp75
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp288
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp3
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.cpp31
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.h6
-rw-r--r--llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp8
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.h38
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.td34
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp101
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.h2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp8
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.cpp40
-rw-r--r--llvm/lib/Target/VE/VEInstrPatternsVec.td14
-rw-r--r--llvm/lib/Target/VE/VEInstrVec.td27
-rw-r--r--llvm/lib/Target/VE/VERegisterInfo.cpp201
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp23
-rw-r--r--llvm/lib/Target/X86/X86.td2
-rw-r--r--llvm/lib/Target/X86/X86FixupBWInsts.cpp12
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp561
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td48
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp12
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h3
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td70
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp8
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp65
-rw-r--r--llvm/lib/Target/XCore/XCoreFrameLowering.cpp2
-rw-r--r--llvm/lib/ToolDrivers/llvm-lib/Options.td1
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroCleanup.cpp4
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInternal.h2
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp19
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp278
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp2074
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp45
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/IPO.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/Internalize.cpp36
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp42
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp106
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp102
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp70
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp12
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp45
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp55
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp57
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp18
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp52
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp3
-rw-r--r--llvm/lib/Transforms/Instrumentation/CGProfile.cpp36
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp53
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp31
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp44
-rw-r--r--llvm/lib/Transforms/Instrumentation/Instrumentation.cpp5
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp16
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp42
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp64
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp73
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp1
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPredication.cpp29
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp93
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp58
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp59
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp13
-rw-r--r--llvm/lib/Transforms/Utils/BuildLibCalls.cpp228
-rw-r--r--llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp28
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp19
-rw-r--r--llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp59
-rw-r--r--llvm/lib/Transforms/Utils/Evaluator.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/FunctionImportUtils.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp27
-rw-r--r--llvm/lib/Transforms/Utils/LoopRotationUtils.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LoopSimplify.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/LowerAtomic.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/NameAnonGlobals.cpp31
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp19
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyIndVar.cpp105
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp11
-rw-r--r--llvm/lib/Transforms/Utils/UnifyLoopExits.cpp46
-rw-r--r--llvm/lib/Transforms/Utils/Utils.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp245
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp11
-rw-r--r--llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp46
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp66
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp2
419 files changed, 11599 insertions, 5293 deletions
diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp
index 234a73bff6a8..bb25244a88dd 100644
--- a/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -579,7 +579,7 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() {
AliasAnyAS->Access = AliasSet::ModRefAccess;
AliasAnyAS->AliasAny = true;
- for (auto Cur : ASVector) {
+ for (auto *Cur : ASVector) {
// If Cur was already forwarding, just forward to the new AS instead.
AliasSet *FwdTo = Cur->Forward;
if (FwdTo) {
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index c78f822b8bcf..c3b032abcba2 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1764,7 +1764,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
// Make sure that the visited phis cannot reach the Value. This ensures that
// the Values cannot come from different iterations of a potential cycle the
// phi nodes could be involved in.
- for (auto *P : VisitedPhiBBs)
+ for (const auto *P : VisitedPhiBBs)
if (isPotentiallyReachable(&P->front(), Inst, nullptr, DT))
return false;
diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 428ae8975c30..f45728768fcd 100644
--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -1250,7 +1250,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI,
// Walk the basic blocks in post-order so that we can build up state about
// the successors of a block iteratively.
- for (auto BB : post_order(&F.getEntryBlock())) {
+ for (const auto *BB : post_order(&F.getEntryBlock())) {
LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
<< "\n");
// If there is no at least two successors, no sense to set probability.
diff --git a/llvm/lib/Analysis/CFG.cpp b/llvm/lib/Analysis/CFG.cpp
index 1902d72f2f89..e5dd45842d6a 100644
--- a/llvm/lib/Analysis/CFG.cpp
+++ b/llvm/lib/Analysis/CFG.cpp
@@ -149,7 +149,7 @@ bool llvm::isPotentiallyReachableFromMany(
// untrue.
SmallPtrSet<const Loop *, 8> LoopsWithHoles;
if (LI && ExclusionSet) {
- for (auto BB : *ExclusionSet) {
+ for (auto *BB : *ExclusionSet) {
if (const Loop *L = getOutermostLoop(LI, BB))
LoopsWithHoles.insert(L);
}
diff --git a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 602a01867f3b..d70e1b21d768 100644
--- a/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -615,7 +615,7 @@ static void initializeWorkList(std::vector<WorkListItem> &WorkList,
auto Src = InstantiatedValue{Val, I};
// If there's an assignment edge from X to Y, it means Y is reachable from
// X at S3 and X is reachable from Y at S1
- for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
+ for (const auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet,
WorkList);
propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet,
diff --git a/llvm/lib/Analysis/CFLGraph.h b/llvm/lib/Analysis/CFLGraph.h
index 60fc8d18678c..47bb02ac8e8b 100644
--- a/llvm/lib/Analysis/CFLGraph.h
+++ b/llvm/lib/Analysis/CFLGraph.h
@@ -434,7 +434,8 @@ template <typename CFLAA> class CFLGraphBuilder {
// introduce any aliases.
// TODO: address other common library functions such as realloc(),
// strdup(), etc.
- if (isMallocOrCallocLikeFn(&Call, &TLI) || isFreeCall(&Call, &TLI))
+ if (isMallocOrCallocLikeFn(&Call, &TLI) ||
+ getFreedOperand(&Call, &TLI) != nullptr)
return;
// TODO: Add support for noalias args/all the other fun function
diff --git a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
index f92869c2ec63..33ed6f88f82b 100644
--- a/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -216,7 +216,7 @@ CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
auto Src = InstantiatedValue{Val, I};
- for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
+ for (const auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
SetBuilder.addWith(Src, Edge.Other);
}
}
diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp
index dc774728ab3d..2efa474f3552 100644
--- a/llvm/lib/Analysis/ConstraintSystem.cpp
+++ b/llvm/lib/Analysis/ConstraintSystem.cpp
@@ -110,7 +110,7 @@ void ConstraintSystem::dump(ArrayRef<std::string> Names) const {
if (Constraints.empty())
return;
- for (auto &Row : Constraints) {
+ for (const auto &Row : Constraints) {
SmallVector<std::string, 16> Parts;
for (unsigned I = 1, S = Row.size(); I < S; ++I) {
if (Row[I] == 0)
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp
index 52e424ae324b..3c162f604cd5 100644
--- a/llvm/lib/Analysis/CostModel.cpp
+++ b/llvm/lib/Analysis/CostModel.cpp
@@ -25,6 +25,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/IntrinsicInst.h"
using namespace llvm;
static cl::opt<TargetTransformInfo::TargetCostKind> CostKind(
@@ -39,6 +40,9 @@ static cl::opt<TargetTransformInfo::TargetCostKind> CostKind(
clEnumValN(TargetTransformInfo::TCK_SizeAndLatency,
"size-latency", "Code size and latency")));
+static cl::opt<bool> TypeBasedIntrinsicCost("type-based-intrinsic-cost",
+ cl::desc("Calculate intrinsics cost based only on argument types"),
+ cl::init(false));
#define CM_NAME "cost-model"
#define DEBUG_TYPE CM_NAME
@@ -103,7 +107,16 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
for (BasicBlock &B : *F) {
for (Instruction &Inst : B) {
- InstructionCost Cost = TTI->getInstructionCost(&Inst, CostKind);
+ InstructionCost Cost;
+ if (TypeBasedIntrinsicCost && isa<IntrinsicInst>(&Inst)) {
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II,
+ InstructionCost::getInvalid(), true);
+ Cost = TTI->getIntrinsicInstrCost(ICA, CostKind);
+ }
+ else {
+ Cost = TTI->getInstructionCost(&Inst, CostKind);
+ }
if (auto CostVal = Cost.getValue())
OS << "Cost Model: Found an estimated cost of " << *CostVal;
else
@@ -122,7 +135,16 @@ PreservedAnalyses CostModelPrinterPass::run(Function &F,
for (Instruction &Inst : B) {
// TODO: Use a pass parameter instead of cl::opt CostKind to determine
// which cost kind to print.
- InstructionCost Cost = TTI.getInstructionCost(&Inst, CostKind);
+ InstructionCost Cost;
+ if (TypeBasedIntrinsicCost && isa<IntrinsicInst>(&Inst)) {
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II,
+ InstructionCost::getInvalid(), true);
+ Cost = TTI.getIntrinsicInstrCost(ICA, CostKind);
+ }
+ else {
+ Cost = TTI.getInstructionCost(&Inst, CostKind);
+ }
if (auto CostVal = Cost.getValue())
OS << "Cost Model: Found an estimated cost of " << *CostVal;
else
diff --git a/llvm/lib/Analysis/DDG.cpp b/llvm/lib/Analysis/DDG.cpp
index 998c888dd2d9..da64ef153960 100644
--- a/llvm/lib/Analysis/DDG.cpp
+++ b/llvm/lib/Analysis/DDG.cpp
@@ -95,7 +95,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) {
llvm_unreachable("unimplemented type of node");
OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n");
- for (auto &E : N.getEdges())
+ for (const auto &E : N.getEdges())
OS.indent(2) << *E;
return OS;
}
@@ -188,7 +188,7 @@ DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D)
// Put the basic blocks in program order for correct dependence
// directions.
BasicBlockListType BBList;
- for (auto &SCC : make_range(scc_begin(&F), scc_end(&F)))
+ for (const auto &SCC : make_range(scc_begin(&F), scc_end(&F)))
append_range(BBList, SCC);
std::reverse(BBList.begin(), BBList.end());
DDGBuilder(*this, D, BBList).populate();
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index c36e1d922915..7ab91b9eaea4 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -133,7 +133,7 @@ struct SCEVCollectAddRecMultiplies {
if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
bool HasAddRec = false;
SmallVector<const SCEV *, 0> Operands;
- for (auto Op : Mul->operands()) {
+ for (const auto *Op : Mul->operands()) {
const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
if (Unknown && !isa<CallInst>(Unknown->getValue())) {
Operands.push_back(Op);
diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp
index 1a4b09e0cac2..02c40d2640c1 100644
--- a/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -220,19 +220,19 @@ void DivergenceAnalysisImpl::analyzeLoopExitDivergence(
// phi nodes at the fringes of the dominance region
if (!DT.dominates(&LoopHeader, UserBlock)) {
// all PHI nodes of UserBlock become divergent
- for (auto &Phi : UserBlock->phis()) {
+ for (const auto &Phi : UserBlock->phis()) {
analyzeTemporalDivergence(Phi, OuterDivLoop);
}
continue;
}
// Taint outside users of values carried by OuterDivLoop.
- for (auto &I : *UserBlock) {
+ for (const auto &I : *UserBlock) {
analyzeTemporalDivergence(I, OuterDivLoop);
}
// visit all blocks in the dominance region
- for (auto *SuccBlock : successors(UserBlock)) {
+ for (const auto *SuccBlock : successors(UserBlock)) {
if (!Visited.insert(SuccBlock).second) {
continue;
}
@@ -399,7 +399,7 @@ DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
}
for (const BasicBlock &BB : F) {
OS << "\n " << BB.getName() << ":\n";
- for (auto &I : BB.instructionsWithoutDebug()) {
+ for (const auto &I : BB.instructionsWithoutDebug()) {
OS << (DI.isDivergent(I) ? "DIVERGENT: " : " ");
OS << I << "\n";
}
diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp
index db6eae0d962a..e6ef1c78a628 100644
--- a/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -361,7 +361,7 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
if (Call->isDataOperand(&U)) {
// Detect calls to free.
if (Call->isArgOperand(&U) &&
- isFreeCall(I, &GetTLI(*Call->getFunction()))) {
+ getFreedOperand(Call, &GetTLI(*Call->getFunction())) == U) {
if (Writers)
Writers->insert(Call->getParent()->getParent());
} else {
@@ -906,7 +906,7 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
// Iterate through all the arguments to the called function. If any argument
// is based on GV, return the conservative result.
- for (auto &A : Call->args()) {
+ for (const auto &A : Call->args()) {
SmallVector<const Value*, 4> Objects;
getUnderlyingObjects(A, Objects);
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index e4d706ab045c..a51e974003f6 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -63,20 +63,6 @@ bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) {
return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind);
}
-bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) {
- switch (Kind) {
- default:
- break;
- case RecurKind::Add:
- case RecurKind::Mul:
- case RecurKind::FAdd:
- case RecurKind::FMul:
- case RecurKind::FMulAdd:
- return true;
- }
- return false;
-}
-
/// Determines if Phi may have been type-promoted. If Phi has a single user
/// that ANDs the Phi with a type mask, return the user. RT is updated to
/// account for the narrower bit width represented by the mask, and the AND
@@ -1170,7 +1156,7 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
ExpectedUses = 2;
auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
- for (auto User : Cur->users()) {
+ for (auto *User : Cur->users()) {
Instruction *UI = cast<Instruction>(User);
if (isa<PHINode>(UI))
continue;
diff --git a/llvm/lib/Analysis/IVUsers.cpp b/llvm/lib/Analysis/IVUsers.cpp
index 5bde947bd851..830211658353 100644
--- a/llvm/lib/Analysis/IVUsers.cpp
+++ b/llvm/lib/Analysis/IVUsers.cpp
@@ -274,7 +274,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
OS << " ";
IVUse.getOperandValToReplace()->printAsOperand(OS, false);
OS << " = " << *getReplacementExpr(IVUse);
- for (auto PostIncLoop : IVUse.PostIncLoops) {
+ for (const auto *PostIncLoop : IVUse.PostIncLoops) {
OS << " (post-inc with loop ";
PostIncLoop->getHeader()->printAsOperand(OS, false);
OS << ")";
diff --git a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
index 9fee57c54b85..78e7f456ebc6 100644
--- a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
+++ b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -68,7 +68,7 @@ bool InstructionPrecedenceTracking::isPreceededBySpecialInstruction(
void InstructionPrecedenceTracking::fill(const BasicBlock *BB) {
FirstSpecialInsts.erase(BB);
- for (auto &I : *BB) {
+ for (const auto &I : *BB) {
NumInstScanned++;
if (isSpecialInstruction(&I)) {
FirstSpecialInsts[BB] = &I;
@@ -101,7 +101,7 @@ void InstructionPrecedenceTracking::validate(const BasicBlock *BB) const {
void InstructionPrecedenceTracking::validateAll() const {
// Check that for every known block the cached value is correct.
- for (auto &It : FirstSpecialInsts)
+ for (const auto &It : FirstSpecialInsts)
validate(It.first);
}
#endif
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index d49b20798c82..2fae260e0d8f 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -1896,7 +1896,7 @@ void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot(
const BasicBlock *BB, formatted_raw_ostream &OS) {
// Find if there are latticevalues defined for arguments of the function.
auto *F = BB->getParent();
- for (auto &Arg : F->args()) {
+ for (const auto &Arg : F->args()) {
ValueLatticeElement Result = LVIImpl->getValueInBlock(
const_cast<Argument *>(&Arg), const_cast<BasicBlock *>(BB));
if (Result.isUnknown())
@@ -1932,12 +1932,12 @@ void LazyValueInfoAnnotatedWriter::emitInstructionAnnot(
printResult(ParentBB);
// Print the LVI analysis results for the immediate successor blocks, that
// are dominated by `ParentBB`.
- for (auto *BBSucc : successors(ParentBB))
+ for (const auto *BBSucc : successors(ParentBB))
if (DT.dominates(ParentBB, BBSucc))
printResult(BBSucc);
// Print LVI in blocks where `I` is used.
- for (auto *U : I->users())
+ for (const auto *U : I->users())
if (auto *UseI = dyn_cast<Instruction>(U))
if (!isa<PHINode>(UseI) || DT.dominates(ParentBB, UseI->getParent()))
printResult(UseI->getParent());
diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
index 491d44335f22..381d62a37662 100644
--- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -393,14 +393,14 @@ void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
return;
// Dumps all divergent values in F, arguments and then instructions.
- for (auto &Arg : F->args()) {
+ for (const auto &Arg : F->args()) {
OS << (isDivergent(&Arg) ? "DIVERGENT: " : " ");
OS << Arg << "\n";
}
// Iterate instructions using instructions() to ensure a deterministic order.
for (const BasicBlock &BB : *F) {
OS << "\n " << BB.getName() << ":\n";
- for (auto &I : BB.instructionsWithoutDebug()) {
+ for (const auto &I : BB.instructionsWithoutDebug()) {
OS << (isDivergent(&I) ? "DIVERGENT: " : " ");
OS << I << "\n";
}
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp
index 9cfb91a22b7d..8b0f2a8ed99b 100644
--- a/llvm/lib/Analysis/Lint.cpp
+++ b/llvm/lib/Analysis/Lint.cpp
@@ -229,7 +229,7 @@ void Lint::visitCallBase(CallBase &I) {
if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
AttributeList PAL = I.getAttributes();
unsigned ArgNo = 0;
- for (auto BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) {
+ for (auto *BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) {
// Skip ByVal arguments since they will be memcpy'd to the callee's
// stack so we're not really passing the pointer anyway.
if (PAL.hasParamAttr(ArgNo, Attribute::ByVal))
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 79161db9b5e4..bed684b7652a 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -130,6 +130,11 @@ static cl::opt<bool> EnableForwardingConflictDetection(
cl::desc("Enable conflict detection in loop-access analysis"),
cl::init(true));
+static cl::opt<unsigned> MaxForkedSCEVDepth(
+ "max-forked-scev-depth", cl::Hidden,
+ cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
+ cl::init(5));
+
bool VectorizerParams::isInterleaveForced() {
return ::VectorizationInterleave.getNumOccurrences() > 0;
}
@@ -288,8 +293,10 @@ void RuntimePointerChecking::tryToCreateDiffCheck(
DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
Type *SrcTy = getLoadStoreType(SrcInsts[0]);
Type *DstTy = getLoadStoreType(SinkInsts[0]);
- if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy))
+ if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) {
+ CanUseDiffCheck = false;
return;
+ }
unsigned AllocSize =
std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
IntegerType *IntTy =
@@ -778,6 +785,140 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
}
}
+// Walk back through the IR for a pointer, looking for a select like the
+// following:
+//
+// %offset = select i1 %cmp, i64 %a, i64 %b
+// %addr = getelementptr double, double* %base, i64 %offset
+// %ld = load double, double* %addr, align 8
+//
+// We won't be able to form a single SCEVAddRecExpr from this since the
+// address for each loop iteration depends on %cmp. We could potentially
+// produce multiple valid SCEVAddRecExprs, though, and check all of them for
+// memory safety/aliasing if needed.
+//
+// If we encounter some IR we don't yet handle, or something obviously fine
+// like a constant, then we just add the SCEV for that term to the list passed
+// in by the caller. If we have a node that may potentially yield a valid
+// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
+// ourselves before adding to the list.
+static void
+findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
+ SmallVectorImpl<std::pair<const SCEV *, bool>> &ScevList,
+ unsigned Depth) {
+ // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
+ // we've exceeded our limit on recursion, just return whatever we have
+ // regardless of whether it can be used for a forked pointer or not, along
+ // with an indication of whether it might be a poison or undef value.
+ const SCEV *Scev = SE->getSCEV(Ptr);
+ if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
+ !isa<Instruction>(Ptr) || Depth == 0) {
+ ScevList.push_back(
+ std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
+ return;
+ }
+
+ Depth--;
+
+ auto UndefPoisonCheck = [](std::pair<const SCEV *, bool> S) -> bool {
+ return S.second;
+ };
+
+ Instruction *I = cast<Instruction>(Ptr);
+ unsigned Opcode = I->getOpcode();
+ switch (Opcode) {
+ case Instruction::GetElementPtr: {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
+ Type *SourceTy = GEP->getSourceElementType();
+ // We only handle base + single offset GEPs here for now.
+ // Not dealing with preexisting gathers yet, so no vectors.
+ if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
+ ScevList.push_back(
+ std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP)));
+ break;
+ }
+ SmallVector<std::pair<const SCEV *, bool>, 2> BaseScevs;
+ SmallVector<std::pair<const SCEV *, bool>, 2> OffsetScevs;
+ findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
+ findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
+
+ // See if we need to freeze our fork...
+ bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
+ any_of(OffsetScevs, UndefPoisonCheck);
+
+ // Check that we only have a single fork, on either the base or the offset.
+ // Copy the SCEV across for the one without a fork in order to generate
+ // the full SCEV for both sides of the GEP.
+ if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
+ BaseScevs.push_back(BaseScevs[0]);
+ else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
+ OffsetScevs.push_back(OffsetScevs[0]);
+ else {
+ ScevList.push_back(std::make_pair(Scev, NeedsFreeze));
+ break;
+ }
+
+ // Find the pointer type we need to extend to.
+ Type *IntPtrTy = SE->getEffectiveSCEVType(
+ SE->getSCEV(GEP->getPointerOperand())->getType());
+
+ // Find the size of the type being pointed to. We only have a single
+ // index term (guarded above) so we don't need to index into arrays or
+ // structures, just get the size of the scalar value.
+ const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
+
+ // Scale up the offsets by the size of the type, then add to the bases.
+ const SCEV *Scaled1 = SE->getMulExpr(
+ Size, SE->getTruncateOrSignExtend(OffsetScevs[0].first, IntPtrTy));
+ const SCEV *Scaled2 = SE->getMulExpr(
+ Size, SE->getTruncateOrSignExtend(OffsetScevs[1].first, IntPtrTy));
+ ScevList.push_back(std::make_pair(
+ SE->getAddExpr(BaseScevs[0].first, Scaled1), NeedsFreeze));
+ ScevList.push_back(std::make_pair(
+ SE->getAddExpr(BaseScevs[1].first, Scaled2), NeedsFreeze));
+ break;
+ }
+ case Instruction::Select: {
+ SmallVector<std::pair<const SCEV *, bool>, 2> ChildScevs;
+ // A select means we've found a forked pointer, but we currently only
+ // support a single select per pointer so if there's another behind this
+ // then we just bail out and return the generic SCEV.
+ findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
+ findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
+ if (ChildScevs.size() == 2) {
+ ScevList.push_back(ChildScevs[0]);
+ ScevList.push_back(ChildScevs[1]);
+ } else
+ ScevList.push_back(
+ std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
+ break;
+ }
+ default:
+ // Just return the current SCEV if we haven't handled the instruction yet.
+ LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
+ ScevList.push_back(
+ std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)));
+ break;
+ }
+}
+
+static SmallVector<std::pair<const SCEV *, bool>>
+findForkedPointer(PredicatedScalarEvolution &PSE,
+ const ValueToValueMap &StridesMap, Value *Ptr,
+ const Loop *L) {
+ ScalarEvolution *SE = PSE.getSE();
+ assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
+ SmallVector<std::pair<const SCEV *, bool>> Scevs;
+ findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
+
+ // For now, we will only accept a forked pointer with two possible SCEVs.
+ if (Scevs.size() == 2)
+ return Scevs;
+
+ return {
+ std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)};
+}
+
bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
MemAccessInfo Access, Type *AccessTy,
const ValueToValueMap &StridesMap,
@@ -787,19 +928,8 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
bool Assume) {
Value *Ptr = Access.getPointer();
- ScalarEvolution &SE = *PSE.getSE();
- SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs;
- auto *SI = dyn_cast<SelectInst>(Ptr);
- // Look through selects in the current loop.
- if (SI && !TheLoop->isLoopInvariant(SI)) {
- TranslatedPtrs = {
- std::make_pair(SE.getSCEV(SI->getOperand(1)),
- !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(1))),
- std::make_pair(SE.getSCEV(SI->getOperand(2)),
- !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(2)))};
- } else
- TranslatedPtrs = {
- std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)};
+ SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs =
+ findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
for (auto &P : TranslatedPtrs) {
const SCEV *PtrExpr = P.first;
@@ -879,7 +1009,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
unsigned RunningDepId = 1;
DenseMap<Value *, unsigned> DepSetId;
- SmallVector<MemAccessInfo, 4> Retries;
+ SmallVector<std::pair<MemAccessInfo, Type *>, 4> Retries;
// First, count how many write and read accesses are in the alias set. Also
// collect MemAccessInfos for later.
@@ -911,13 +1041,13 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
}
for (auto &Access : AccessInfos) {
- for (auto &AccessTy : Accesses[Access]) {
+ for (const auto &AccessTy : Accesses[Access]) {
if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
DepSetId, TheLoop, RunningDepId, ASId,
ShouldCheckWrap, false)) {
LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
<< *Access.getPointer() << '\n');
- Retries.push_back(Access);
+ Retries.push_back({Access, AccessTy});
CanDoAliasSetRT = false;
}
}
@@ -941,15 +1071,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
// We know that we need these checks, so we can now be more aggressive
// and add further checks if required (overflow checks).
CanDoAliasSetRT = true;
- for (auto Access : Retries) {
- for (auto &AccessTy : Accesses[Access]) {
- if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
- DepSetId, TheLoop, RunningDepId, ASId,
- ShouldCheckWrap, /*Assume=*/true)) {
- CanDoAliasSetRT = false;
- UncomputablePtr = Access.getPointer();
- break;
- }
+ for (auto Retry : Retries) {
+ MemAccessInfo Access = Retry.first;
+ Type *AccessTy = Retry.second;
+ if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
+ DepSetId, TheLoop, RunningDepId, ASId,
+ ShouldCheckWrap, /*Assume=*/true)) {
+ CanDoAliasSetRT = false;
+ UncomputablePtr = Access.getPointer();
+ break;
}
}
}
@@ -2461,7 +2591,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (auto *Dependences = DepChecker->getDependences()) {
OS.indent(Depth) << "Dependences:\n";
- for (auto &Dep : *Dependences) {
+ for (const auto &Dep : *Dependences) {
Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
OS << "\n";
}
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 2cbf1f7f2d28..85f2dad86711 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -289,18 +289,14 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
const SCEV *RefCost = nullptr;
- if (isConsecutive(L, CLS)) {
+ const SCEV *Stride = nullptr;
+ if (isConsecutive(L, Stride, CLS)) {
// If the indexed reference is 'consecutive' the cost is
// (TripCount*Stride)/CLS.
- const SCEV *Coeff = getLastCoefficient();
- const SCEV *ElemSize = Sizes.back();
- assert(Coeff->getType() == ElemSize->getType() &&
- "Expecting the same type");
- const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+ assert(Stride != nullptr &&
+ "Stride should not be null for consecutive access!");
Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
- if (SE.isKnownNegative(Stride))
- Stride = SE.getNegativeSCEV(Stride);
Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
@@ -464,7 +460,8 @@ bool IndexedReference::isLoopInvariant(const Loop &L) const {
return allCoeffForLoopAreZero;
}
-bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
+bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride,
+ unsigned CLS) const {
// The indexed reference is 'consecutive' if the only coefficient that uses
// the loop induction variable is the last one...
const SCEV *LastSubscript = Subscripts.back();
@@ -478,7 +475,19 @@ bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
// ...and the access stride is less than the cache line size.
const SCEV *Coeff = getLastCoefficient();
const SCEV *ElemSize = Sizes.back();
- const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+ Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType());
+ // FIXME: This assumes that all values are signed integers which may
+ // be incorrect in unusual codes and incorrectly use sext instead of zext.
+ // for (uint32_t i = 0; i < 512; ++i) {
+ // uint8_t trunc = i;
+ // A[trunc] = 42;
+ // }
+ // This consecutively iterates twice over A. If `trunc` is sign-extended,
+ // we would conclude that this may iterate backwards over the array.
+ // However, LoopCacheAnalysis is heuristic anyway and transformations must
+ // not result in wrong optimizations if the heuristic was incorrect.
+ Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType),
+ SE.getNoopOrSignExtend(ElemSize, WiderType));
const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 29c2437ff5ea..693b9ebd450a 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -425,12 +425,12 @@ bool Loop::isCanonical(ScalarEvolution &SE) const {
// Check that 'BB' doesn't have any uses outside of the 'L'
static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
- const DominatorTree &DT) {
+ const DominatorTree &DT, bool IgnoreTokens) {
for (const Instruction &I : BB) {
// Tokens can't be used in PHI nodes and live-out tokens prevent loop
// optimizations, so for the purposes of considered LCSSA form, we
// can ignore them.
- if (I.getType()->isTokenTy())
+ if (IgnoreTokens && I.getType()->isTokenTy())
continue;
for (const Use &U : I.uses()) {
@@ -455,20 +455,20 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
return true;
}
-bool Loop::isLCSSAForm(const DominatorTree &DT) const {
+bool Loop::isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens) const {
// For each block we check that it doesn't have any uses outside of this loop.
return all_of(this->blocks(), [&](const BasicBlock *BB) {
- return isBlockInLCSSAForm(*this, *BB, DT);
+ return isBlockInLCSSAForm(*this, *BB, DT, IgnoreTokens);
});
}
-bool Loop::isRecursivelyLCSSAForm(const DominatorTree &DT,
- const LoopInfo &LI) const {
+bool Loop::isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI,
+ bool IgnoreTokens) const {
// For each block we check that it doesn't have any uses outside of its
// innermost loop. This process will transitively guarantee that the current
// loop and all of the nested loops are in LCSSA form.
return all_of(this->blocks(), [&](const BasicBlock *BB) {
- return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT);
+ return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT, IgnoreTokens);
});
}
@@ -482,11 +482,8 @@ bool Loop::isLoopSimplifyForm() const {
bool Loop::isSafeToClone() const {
// Return false if any loop blocks contain indirectbrs, or there are any calls
// to noduplicate functions.
- // FIXME: it should be ok to clone CallBrInst's if we correctly update the
- // operand list to reflect the newly cloned labels.
for (BasicBlock *BB : this->blocks()) {
- if (isa<IndirectBrInst>(BB->getTerminator()) ||
- isa<CallBrInst>(BB->getTerminator()))
+ if (isa<IndirectBrInst>(BB->getTerminator()))
return false;
for (Instruction &I : *BB)
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index f5b121c98ec4..31e4380e4379 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -179,6 +179,11 @@ static const Function *getCalledFunction(const Value *V,
static Optional<AllocFnsTy>
getAllocationDataForFunction(const Function *Callee, AllocType AllocTy,
const TargetLibraryInfo *TLI) {
+ // Don't perform a slow TLI lookup, if this function doesn't return a pointer
+ // and thus can't be an allocation function.
+ if (!Callee->getReturnType()->isPointerTy())
+ return None;
+
// Make sure that the function is available.
LibFunc TLIFn;
if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn))
@@ -266,15 +271,42 @@ static Optional<AllocFnsTy> getAllocationSize(const Value *V,
return Result;
}
+static AllocFnKind getAllocFnKind(const Value *V) {
+ if (const auto *CB = dyn_cast<CallBase>(V)) {
+ Attribute Attr = CB->getFnAttr(Attribute::AllocKind);
+ if (Attr.isValid())
+ return AllocFnKind(Attr.getValueAsInt());
+ }
+ return AllocFnKind::Unknown;
+}
+
+static AllocFnKind getAllocFnKind(const Function *F) {
+ Attribute Attr = F->getFnAttribute(Attribute::AllocKind);
+ if (Attr.isValid())
+ return AllocFnKind(Attr.getValueAsInt());
+ return AllocFnKind::Unknown;
+}
+
+static bool checkFnAllocKind(const Value *V, AllocFnKind Wanted) {
+ return (getAllocFnKind(V) & Wanted) != AllocFnKind::Unknown;
+}
+
+static bool checkFnAllocKind(const Function *F, AllocFnKind Wanted) {
+ return (getAllocFnKind(F) & Wanted) != AllocFnKind::Unknown;
+}
+
/// Tests if a value is a call or invoke to a library function that
/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
/// like).
bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI) {
- return getAllocationData(V, AnyAlloc, TLI).has_value();
+ return getAllocationData(V, AnyAlloc, TLI).has_value() ||
+ checkFnAllocKind(V, AllocFnKind::Alloc | AllocFnKind::Realloc);
}
bool llvm::isAllocationFn(
- const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
- return getAllocationData(V, AnyAlloc, GetTLI).has_value();
+ const Value *V,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
+ return getAllocationData(V, AnyAlloc, GetTLI).has_value() ||
+ checkFnAllocKind(V, AllocFnKind::Alloc | AllocFnKind::Realloc);
}
/// Tests if a value is a call or invoke to a library function that
@@ -304,30 +336,36 @@ bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI)
/// Tests if a value is a call or invoke to a library function that
/// allocates memory (either malloc, calloc, or strdup like).
bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
- return getAllocationData(V, AllocLike, TLI).has_value();
-}
-
-/// Tests if a value is a call or invoke to a library function that
-/// reallocates memory (e.g., realloc).
-bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
- return getAllocationData(V, ReallocLike, TLI).has_value();
+ return getAllocationData(V, AllocLike, TLI).has_value() ||
+ checkFnAllocKind(V, AllocFnKind::Alloc);
}
/// Tests if a functions is a call or invoke to a library function that
/// reallocates memory (e.g., realloc).
bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) {
- return getAllocationDataForFunction(F, ReallocLike, TLI).has_value();
+ return getAllocationDataForFunction(F, ReallocLike, TLI).has_value() ||
+ checkFnAllocKind(F, AllocFnKind::Realloc);
}
-bool llvm::isAllocRemovable(const CallBase *CB, const TargetLibraryInfo *TLI) {
- assert(isAllocationFn(CB, TLI));
+Value *llvm::getReallocatedOperand(const CallBase *CB,
+ const TargetLibraryInfo *TLI) {
+ if (getAllocationData(CB, ReallocLike, TLI).has_value()) {
+ // All currently supported realloc functions reallocate the first argument.
+ return CB->getArgOperand(0);
+ }
+ if (checkFnAllocKind(CB, AllocFnKind::Realloc))
+ return CB->getArgOperandWithAttribute(Attribute::AllocatedPointer);
+ return nullptr;
+}
+bool llvm::isRemovableAlloc(const CallBase *CB, const TargetLibraryInfo *TLI) {
// Note: Removability is highly dependent on the source language. For
// example, recent C++ requires direct calls to the global allocation
// [basic.stc.dynamic.allocation] to be observable unless part of a new
// expression [expr.new paragraph 13].
- // Historically we've treated the C family allocation routines as removable
+ // Historically we've treated the C family allocation routines and operator
+ // new as removable
return isAllocLikeFn(CB, TLI);
}
@@ -357,9 +395,8 @@ static bool CheckedZextOrTrunc(APInt &I, unsigned IntTyBits) {
}
Optional<APInt>
-llvm::getAllocSize(const CallBase *CB,
- const TargetLibraryInfo *TLI,
- std::function<const Value*(const Value*)> Mapper) {
+llvm::getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI,
+ function_ref<const Value *(const Value *)> Mapper) {
// Note: This handles both explicitly listed allocation functions and
// allocsize. The code structure could stand to be cleaned up a bit.
Optional<AllocFnsTy> FnData = getAllocationSize(CB, TLI);
@@ -434,6 +471,12 @@ Constant *llvm::getInitialValueOfAllocation(const Value *V,
if (isCallocLikeFn(Alloc, TLI))
return Constant::getNullValue(Ty);
+ AllocFnKind AK = getAllocFnKind(Alloc);
+ if ((AK & AllocFnKind::Uninitialized) != AllocFnKind::Unknown)
+ return UndefValue::get(Ty);
+ if ((AK & AllocFnKind::Zeroed) != AllocFnKind::Unknown)
+ return Constant::getNullValue(Ty);
+
return nullptr;
}
@@ -497,14 +540,23 @@ Optional<StringRef> llvm::getAllocationFamily(const Value *I,
if (Callee == nullptr || IsNoBuiltin)
return None;
LibFunc TLIFn;
- if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn))
- return None;
- const auto AllocData = getAllocationDataForFunction(Callee, AnyAlloc, TLI);
- if (AllocData)
- return mangledNameForMallocFamily(AllocData.value().Family);
- const auto FreeData = getFreeFunctionDataForFunction(Callee, TLIFn);
- if (FreeData)
- return mangledNameForMallocFamily(FreeData.value().Family);
+
+ if (TLI && TLI->getLibFunc(*Callee, TLIFn) && TLI->has(TLIFn)) {
+ // Callee is some known library function.
+ const auto AllocData = getAllocationDataForFunction(Callee, AnyAlloc, TLI);
+ if (AllocData)
+ return mangledNameForMallocFamily(AllocData.value().Family);
+ const auto FreeData = getFreeFunctionDataForFunction(Callee, TLIFn);
+ if (FreeData)
+ return mangledNameForMallocFamily(FreeData.value().Family);
+ }
+ // Callee isn't a known library function, still check attributes.
+ if (checkFnAllocKind(I, AllocFnKind::Free | AllocFnKind::Alloc |
+ AllocFnKind::Realloc)) {
+ Attribute Attr = cast<CallBase>(I)->getFnAttr("alloc-family");
+ if (Attr.isValid())
+ return Attr.getValueAsString();
+ }
return None;
}
@@ -512,7 +564,7 @@ Optional<StringRef> llvm::getAllocationFamily(const Value *I,
bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) {
Optional<FreeFnsTy> FnData = getFreeFunctionDataForFunction(F, TLIFn);
if (!FnData)
- return false;
+ return checkFnAllocKind(F, AllocFnKind::Free);
// Check free prototype.
// FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
@@ -528,20 +580,24 @@ bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) {
return true;
}
-/// isFreeCall - Returns non-null if the value is a call to the builtin free()
-const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
+Value *llvm::getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI) {
bool IsNoBuiltinCall;
- const Function *Callee = getCalledFunction(I, IsNoBuiltinCall);
+ const Function *Callee = getCalledFunction(CB, IsNoBuiltinCall);
if (Callee == nullptr || IsNoBuiltinCall)
return nullptr;
LibFunc TLIFn;
- if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn))
- return nullptr;
+ if (TLI && TLI->getLibFunc(*Callee, TLIFn) && TLI->has(TLIFn) &&
+ isLibFreeFunction(Callee, TLIFn)) {
+ // All currently supported free functions free the first argument.
+ return CB->getArgOperand(0);
+ }
- return isLibFreeFunction(Callee, TLIFn) ? dyn_cast<CallInst>(I) : nullptr;
-}
+ if (checkFnAllocKind(CB, AllocFnKind::Free))
+ return CB->getArgOperandWithAttribute(Attribute::AllocatedPointer);
+ return nullptr;
+}
//===----------------------------------------------------------------------===//
// Utility functions to compute size of objects.
@@ -765,8 +821,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
}
SizeOffsetType ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) {
- auto Mapper = [](const Value *V) { return V; };
- if (Optional<APInt> Size = getAllocSize(&CB, TLI, Mapper))
+ if (Optional<APInt> Size = getAllocSize(&CB, TLI))
return std::make_pair(*Size, Zero);
return unknown();
}
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 690d575ef979..fce9d5b24faf 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -139,10 +139,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
return ModRefInfo::ModRef;
}
- if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
- // calls to free() deallocate the entire structure
- Loc = MemoryLocation::getAfter(CI->getArgOperand(0));
- return ModRefInfo::Mod;
+ if (const CallBase *CB = dyn_cast<CallBase>(Inst)) {
+ if (Value *FreedOp = getFreedOperand(CB, &TLI)) {
+ // calls to free() deallocate the entire structure
+ Loc = MemoryLocation::getAfter(FreedOp);
+ return ModRefInfo::Mod;
+ }
}
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
new file mode 100644
index 000000000000..3d11cb81226e
--- /dev/null
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -0,0 +1,226 @@
+//===-- MemoryProfileInfo.cpp - memory profile info ------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains utilities to analyze memory profile information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+using namespace llvm::memprof;
+
+#define DEBUG_TYPE "memory-profile-info"
+
+// Upper bound on accesses per byte for marking an allocation cold.
+cl::opt<float> MemProfAccessesPerByteColdThreshold(
+ "memprof-accesses-per-byte-cold-threshold", cl::init(10.0), cl::Hidden,
+ cl::desc("The threshold the accesses per byte must be under to consider "
+ "an allocation cold"));
+
+// Lower bound on lifetime to mark an allocation cold (in addition to accesses
+// per byte above). This is to avoid pessimizing short lived objects.
+cl::opt<unsigned> MemProfMinLifetimeColdThreshold(
+ "memprof-min-lifetime-cold-threshold", cl::init(200), cl::Hidden,
+ cl::desc("The minimum lifetime (s) for an allocation to be considered "
+ "cold"));
+
+AllocationType llvm::memprof::getAllocType(uint64_t MaxAccessCount,
+ uint64_t MinSize,
+ uint64_t MinLifetime) {
+ if (((float)MaxAccessCount) / MinSize < MemProfAccessesPerByteColdThreshold &&
+ // MinLifetime is expected to be in ms, so convert the threshold to ms.
+ MinLifetime >= MemProfMinLifetimeColdThreshold * 1000)
+ return AllocationType::Cold;
+ return AllocationType::NotCold;
+}
+
+MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
+ LLVMContext &Ctx) {
+ std::vector<Metadata *> StackVals;
+ for (auto Id : CallStack) {
+ auto *StackValMD =
+ ValueAsMetadata::get(ConstantInt::get(Type::getInt64Ty(Ctx), Id));
+ StackVals.push_back(StackValMD);
+ }
+ return MDNode::get(Ctx, StackVals);
+}
+
+MDNode *llvm::memprof::getMIBStackNode(const MDNode *MIB) {
+ assert(MIB->getNumOperands() == 2);
+ // The stack metadata is the first operand of each memprof MIB metadata.
+ return cast<MDNode>(MIB->getOperand(0));
+}
+
+AllocationType llvm::memprof::getMIBAllocType(const MDNode *MIB) {
+ assert(MIB->getNumOperands() == 2);
+ // The allocation type is currently the second operand of each memprof
+ // MIB metadata. This will need to change as we add additional allocation
+ // types that can be applied based on the allocation profile data.
+ auto *MDS = dyn_cast<MDString>(MIB->getOperand(1));
+ assert(MDS);
+ if (MDS->getString().equals("cold"))
+ return AllocationType::Cold;
+ return AllocationType::NotCold;
+}
+
+static std::string getAllocTypeAttributeString(AllocationType Type) {
+ switch (Type) {
+ case AllocationType::NotCold:
+ return "notcold";
+ break;
+ case AllocationType::Cold:
+ return "cold";
+ break;
+ default:
+ assert(false && "Unexpected alloc type");
+ }
+ llvm_unreachable("invalid alloc type");
+}
+
+static void addAllocTypeAttribute(LLVMContext &Ctx, CallBase *CI,
+ AllocationType AllocType) {
+ auto AllocTypeString = getAllocTypeAttributeString(AllocType);
+ auto A = llvm::Attribute::get(Ctx, "memprof", AllocTypeString);
+ CI->addFnAttr(A);
+}
+
+static bool hasSingleAllocType(uint8_t AllocTypes) {
+ const unsigned NumAllocTypes = countPopulation(AllocTypes);
+ assert(NumAllocTypes != 0);
+ return NumAllocTypes == 1;
+}
+
+void CallStackTrie::addCallStack(AllocationType AllocType,
+ ArrayRef<uint64_t> StackIds) {
+ bool First = true;
+ CallStackTrieNode *Curr = nullptr;
+ for (auto StackId : StackIds) {
+ // If this is the first stack frame, add or update alloc node.
+ if (First) {
+ First = false;
+ if (Alloc) {
+ assert(AllocStackId == StackId);
+ Alloc->AllocTypes |= static_cast<uint8_t>(AllocType);
+ } else {
+ AllocStackId = StackId;
+ Alloc = new CallStackTrieNode(AllocType);
+ }
+ Curr = Alloc;
+ continue;
+ }
+ // Update existing caller node if it exists.
+ auto Next = Curr->Callers.find(StackId);
+ if (Next != Curr->Callers.end()) {
+ Curr = Next->second;
+ Curr->AllocTypes |= static_cast<uint8_t>(AllocType);
+ continue;
+ }
+ // Otherwise add a new caller node.
+ auto *New = new CallStackTrieNode(AllocType);
+ Curr->Callers[StackId] = New;
+ Curr = New;
+ }
+ assert(Curr);
+}
+
+void CallStackTrie::addCallStack(MDNode *MIB) {
+ MDNode *StackMD = getMIBStackNode(MIB);
+ assert(StackMD);
+ std::vector<uint64_t> CallStack;
+ CallStack.reserve(StackMD->getNumOperands());
+ for (auto &MIBStackIter : StackMD->operands()) {
+ auto *StackId = mdconst::dyn_extract<ConstantInt>(MIBStackIter);
+ assert(StackId);
+ CallStack.push_back(StackId->getZExtValue());
+ }
+ addCallStack(getMIBAllocType(MIB), CallStack);
+}
+
+static MDNode *createMIBNode(LLVMContext &Ctx,
+ std::vector<uint64_t> &MIBCallStack,
+ AllocationType AllocType) {
+ std::vector<Metadata *> MIBPayload(
+ {buildCallstackMetadata(MIBCallStack, Ctx)});
+ MIBPayload.push_back(
+ MDString::get(Ctx, getAllocTypeAttributeString(AllocType)));
+ return MDNode::get(Ctx, MIBPayload);
+}
+
+// Recursive helper to trim contexts and create metadata nodes.
+// Caller should have pushed Node's loc to MIBCallStack. Doing this in the
+// caller makes it simpler to handle the many early returns in this method.
+bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
+ std::vector<uint64_t> &MIBCallStack,
+ std::vector<Metadata *> &MIBNodes,
+ bool CalleeHasAmbiguousCallerContext) {
+ // Trim context below the first node in a prefix with a single alloc type.
+ // Add an MIB record for the current call stack prefix.
+ if (hasSingleAllocType(Node->AllocTypes)) {
+ MIBNodes.push_back(
+ createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes));
+ return true;
+ }
+
+ // We don't have a single allocation for all the contexts sharing this prefix,
+ // so recursively descend into callers in trie.
+ if (!Node->Callers.empty()) {
+ bool NodeHasAmbiguousCallerContext = Node->Callers.size() > 1;
+ bool AddedMIBNodesForAllCallerContexts = true;
+ for (auto &Caller : Node->Callers) {
+ MIBCallStack.push_back(Caller.first);
+ AddedMIBNodesForAllCallerContexts &=
+ buildMIBNodes(Caller.second, Ctx, MIBCallStack, MIBNodes,
+ NodeHasAmbiguousCallerContext);
+ // Remove Caller.
+ MIBCallStack.pop_back();
+ }
+ if (AddedMIBNodesForAllCallerContexts)
+ return true;
+ // We expect that the callers should be forced to add MIBs to disambiguate
+ // the context in this case (see below).
+ assert(!NodeHasAmbiguousCallerContext);
+ }
+
+ // If we reached here, then this node does not have a single allocation type,
+ // and we didn't add metadata for a longer call stack prefix including any of
+ // Node's callers. That means we never hit a single allocation type along all
+ // call stacks with this prefix. This can happen due to recursion collapsing
+ // or the stack being deeper than tracked by the profiler runtime, leading to
+ // contexts with different allocation types being merged. In that case, we
+ // trim the context just below the deepest context split, which is this
+ // node if the callee has an ambiguous caller context (multiple callers),
+ // since the recursive calls above returned false. Conservatively give it
+ // non-cold allocation type.
+ if (!CalleeHasAmbiguousCallerContext)
+ return false;
+ MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold));
+ return true;
+}
+
+// Build and attach the minimal necessary MIB metadata. If the alloc has a
+// single allocation type, add a function attribute instead. Returns true if
+// memprof metadata attached, false if not (attribute added).
+bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
+ auto &Ctx = CI->getContext();
+ if (hasSingleAllocType(Alloc->AllocTypes)) {
+ addAllocTypeAttribute(Ctx, CI, (AllocationType)Alloc->AllocTypes);
+ return false;
+ }
+ std::vector<uint64_t> MIBCallStack;
+ MIBCallStack.push_back(AllocStackId);
+ std::vector<Metadata *> MIBNodes;
+ assert(!Alloc->Callers.empty() && "addCallStack has not been called yet");
+ buildMIBNodes(Alloc, Ctx, MIBCallStack, MIBNodes,
+ /*CalleeHasAmbiguousCallerContext=*/true);
+ assert(MIBCallStack.size() == 1 &&
+ "Should only be left with Alloc's location in stack");
+ CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes));
+ return true;
+}
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
index eb75118210b9..9ad60f774e9f 100644
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -451,7 +451,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
}
// Existing Phi blocks may need renaming too, if an access was previously
// optimized and the inserted Defs "covers" the Optimized value.
- for (auto &MP : ExistingPhis) {
+ for (const auto &MP : ExistingPhis) {
MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MP);
if (Phi)
MSSA->renamePass(Phi->getBlock(), nullptr, Visited);
@@ -462,7 +462,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) {
SmallPtrSet<const BasicBlock *, 8> Seen;
SmallVector<const BasicBlock *, 16> Worklist;
- for (auto &Var : Vars) {
+ for (const auto &Var : Vars) {
MemoryAccess *NewDef = dyn_cast_or_null<MemoryAccess>(Var);
if (!NewDef)
continue;
@@ -744,10 +744,10 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap);
};
- for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+ for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
ProcessBlock(BB);
- for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+ for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
if (MemoryAccess *NewPhi = MPhiMap.lookup(MPhi))
FixPhiIncomingValues(MPhi, cast<MemoryPhi>(NewPhi));
@@ -811,7 +811,7 @@ void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates,
SmallVector<CFGUpdate, 4> DeleteUpdates;
SmallVector<CFGUpdate, 4> RevDeleteUpdates;
SmallVector<CFGUpdate, 4> InsertUpdates;
- for (auto &Update : Updates) {
+ for (const auto &Update : Updates) {
if (Update.getKind() == DT.Insert)
InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
else {
@@ -958,7 +958,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
};
SmallDenseMap<BasicBlock *, PredInfo> PredMap;
- for (auto &Edge : Updates) {
+ for (const auto &Edge : Updates) {
BasicBlock *BB = Edge.getTo();
auto &AddedBlockSet = PredMap[BB].Added;
AddedBlockSet.insert(Edge.getFrom());
@@ -1003,7 +1003,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
// First create MemoryPhis in all blocks that don't have one. Create in the
// order found in Updates, not in PredMap, to get deterministic numbering.
- for (auto &Edge : Updates) {
+ for (const auto &Edge : Updates) {
BasicBlock *BB = Edge.getTo();
if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB))
InsertedPhis.push_back(MSSA->createMemoryPhi(BB));
@@ -1400,7 +1400,7 @@ void MemorySSAUpdater::removeBlocks(
}
void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) {
- for (auto &VH : UpdatedPHIs)
+ for (const auto &VH : UpdatedPHIs)
if (auto *MPhi = cast_or_null<MemoryPhi>(VH))
tryRemoveTrivialPhi(MPhi);
}
diff --git a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 373aaa48b1d1..756f92e1aac4 100644
--- a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -95,7 +95,7 @@ static void printModuleDebugInfo(raw_ostream &O, const Module *M,
O << '\n';
}
- for (auto GVU : Finder.global_variables()) {
+ for (auto *GVU : Finder.global_variables()) {
const auto *GV = GVU->getVariable();
O << "Global variable: " << GV->getName();
printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine());
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 2b98634ef7bf..c52b27a38fe9 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -400,7 +400,7 @@ static void computeFunctionSummary(
// to enable importing for subsequent indirect call promotion and
// inlining.
if (auto *MD = I.getMetadata(LLVMContext::MD_callees)) {
- for (auto &Op : MD->operands()) {
+ for (const auto &Op : MD->operands()) {
Function *Callee = mdconst::extract_or_null<Function>(Op);
if (Callee)
CallGraphEdges[Index.getOrInsertValueInfo(Callee)];
@@ -412,7 +412,7 @@ static void computeFunctionSummary(
auto CandidateProfileData =
ICallAnalysis.getPromotionCandidatesForInstruction(
&I, NumVals, TotalCount, NumCandidates);
- for (auto &Candidate : CandidateProfileData)
+ for (const auto &Candidate : CandidateProfileData)
CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)]
.updateHotness(getHotness(Candidate.Count, PSI));
}
@@ -451,7 +451,7 @@ static void computeFunctionSummary(
// If both load and store instruction reference the same variable
// we won't be able to optimize it. Add all such reference edges
// to RefEdges set.
- for (auto &VI : StoreRefEdges)
+ for (const auto &VI : StoreRefEdges)
if (LoadRefEdges.remove(VI))
RefEdges.insert(VI);
@@ -459,11 +459,11 @@ static void computeFunctionSummary(
// All new reference edges inserted in two loops below are either
// read or write only. They will be grouped in the end of RefEdges
// vector, so we can use a single integer value to identify them.
- for (auto &VI : LoadRefEdges)
+ for (const auto &VI : LoadRefEdges)
RefEdges.insert(VI);
unsigned FirstWORef = RefEdges.size();
- for (auto &VI : StoreRefEdges)
+ for (const auto &VI : StoreRefEdges)
RefEdges.insert(VI);
Refs = RefEdges.takeVector();
@@ -646,15 +646,18 @@ static void computeVariableSummary(ModuleSummaryIndex &Index,
Index.addGlobalValueSummary(V, std::move(GVarSummary));
}
-static void
-computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
- DenseSet<GlobalValue::GUID> &CantBePromoted) {
+static void computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
+ DenseSet<GlobalValue::GUID> &CantBePromoted) {
+ // Skip summary for indirect function aliases as summary for aliasee will not
+ // be emitted.
+ const GlobalObject *Aliasee = A.getAliaseeObject();
+ if (isa<GlobalIFunc>(Aliasee))
+ return;
bool NonRenamableLocal = isNonRenamableLocal(A);
GlobalValueSummary::GVFlags Flags(
A.getLinkage(), A.getVisibility(), NonRenamableLocal,
/* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable());
auto AS = std::make_unique<AliasSummary>(Flags);
- auto *Aliasee = A.getAliaseeObject();
auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
assert(AliaseeVI && "Alias expects aliasee summary to be available");
assert(AliaseeVI.getSummaryList().size() == 1 &&
@@ -668,7 +671,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
// Set LiveRoot flag on entries matching the given value name.
static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) {
if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name)))
- for (auto &Summary : VI.getSummaryList())
+ for (const auto &Summary : VI.getSummaryList())
Summary->setLive(true);
}
@@ -776,7 +779,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
// Compute summaries for all functions defined in module, and save in the
// index.
- for (auto &F : M) {
+ for (const auto &F : M) {
if (F.isDeclaration())
continue;
@@ -811,6 +814,13 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
for (const GlobalAlias &A : M.aliases())
computeAliasSummary(Index, A, CantBePromoted);
+ // Iterate through ifuncs, set their resolvers all alive.
+ for (const GlobalIFunc &I : M.ifuncs()) {
+ I.applyAlongResolverPath([&Index](const GlobalValue &GV) {
+ Index.getGlobalValueSummary(GV)->setLive(true);
+ });
+ }
+
for (auto *V : LocalsUsed) {
auto *Summary = Index.getGlobalValueSummary(*V);
assert(Summary && "Missing summary for global value");
diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp
index ad8322d7bd79..ac6590c1d8a2 100644
--- a/llvm/lib/Analysis/MustExecute.cpp
+++ b/llvm/lib/Analysis/MustExecute.cpp
@@ -81,7 +81,7 @@ void ICFLoopSafetyInfo::computeLoopSafetyInfo(const Loop *CurLoop) {
MW.clear();
MayThrow = false;
// Figure out the fact that at least one block may throw.
- for (auto &BB : CurLoop->blocks())
+ for (const auto &BB : CurLoop->blocks())
if (ICF.hasICF(&*BB)) {
MayThrow = true;
break;
@@ -164,7 +164,7 @@ static void collectTransitivePredecessors(
if (BB == CurLoop->getHeader())
return;
SmallVector<const BasicBlock *, 4> WorkList;
- for (auto *Pred : predecessors(BB)) {
+ for (const auto *Pred : predecessors(BB)) {
Predecessors.insert(Pred);
WorkList.push_back(Pred);
}
@@ -180,7 +180,7 @@ static void collectTransitivePredecessors(
// @nested and @nested_no_throw in test/Analysis/MustExecute/loop-header.ll.
// We can ignore backedge of all loops containing BB to get a sligtly more
// optimistic result.
- for (auto *PredPred : predecessors(Pred))
+ for (const auto *PredPred : predecessors(Pred))
if (Predecessors.insert(PredPred).second)
WorkList.push_back(PredPred);
}
@@ -207,7 +207,7 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
// 3) Exit blocks which are not taken on 1st iteration.
// Memoize blocks we've already checked.
SmallPtrSet<const BasicBlock *, 4> CheckedSuccessors;
- for (auto *Pred : Predecessors) {
+ for (const auto *Pred : Predecessors) {
// Predecessor block may throw, so it has a side exit.
if (blockMayThrow(Pred))
return false;
@@ -217,7 +217,7 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
if (DT->dominates(BB, Pred))
continue;
- for (auto *Succ : successors(Pred))
+ for (const auto *Succ : successors(Pred))
if (CheckedSuccessors.insert(Succ).second &&
Succ != BB && !Predecessors.count(Succ))
// By discharging conditions that are not executed on the 1st iteration,
@@ -285,7 +285,7 @@ bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const BasicBlock *BB,
collectTransitivePredecessors(CurLoop, BB, Predecessors);
// Find if there any instruction in either predecessor that could write
// to memory.
- for (auto *Pred : Predecessors)
+ for (const auto *Pred : Predecessors)
if (MW.mayWriteToMemory(Pred))
return false;
return true;
@@ -413,7 +413,7 @@ class MustExecuteAnnotatedWriter : public AssemblyAnnotationWriter {
public:
MustExecuteAnnotatedWriter(const Function &F,
DominatorTree &DT, LoopInfo &LI) {
- for (auto &I: instructions(F)) {
+ for (const auto &I: instructions(F)) {
Loop *L = LI.getLoopFor(I.getParent());
while (L) {
if (isMustExecuteIn(I, L, &DT)) {
@@ -425,8 +425,8 @@ public:
}
MustExecuteAnnotatedWriter(const Module &M,
DominatorTree &DT, LoopInfo &LI) {
- for (auto &F : M)
- for (auto &I: instructions(F)) {
+ for (const auto &F : M)
+ for (const auto &I: instructions(F)) {
Loop *L = LI.getLoopFor(I.getParent());
while (L) {
if (isMustExecuteIn(I, L, &DT)) {
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index f61806bd1dad..d46248aa3889 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -1158,7 +1158,7 @@ const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op,
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
bool Changed = false;
- for (auto *Op : Expr->operands()) {
+ for (const auto *Op : Expr->operands()) {
Operands.push_back(visit(Op));
Changed |= Op != Operands.back();
}
@@ -1168,7 +1168,7 @@ const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op,
const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
bool Changed = false;
- for (auto *Op : Expr->operands()) {
+ for (const auto *Op : Expr->operands()) {
Operands.push_back(visit(Op));
Changed |= Op != Operands.back();
}
@@ -4662,7 +4662,7 @@ ScalarEvolution::getUMinFromMismatchedTypes(SmallVectorImpl<const SCEV *> &Ops,
// Find the max type first.
Type *MaxType = nullptr;
- for (auto *S : Ops)
+ for (const auto *S : Ops)
if (MaxType)
MaxType = getWiderType(MaxType, S->getType());
else
@@ -4671,7 +4671,7 @@ ScalarEvolution::getUMinFromMismatchedTypes(SmallVectorImpl<const SCEV *> &Ops,
// Extend all ops to max type.
SmallVector<const SCEV *, 2> PromotedOps;
- for (auto *S : Ops)
+ for (const auto *S : Ops)
PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType));
// Generate umin.
@@ -6636,7 +6636,7 @@ ScalarEvolution::getRangeRef(const SCEV *S,
// Make sure that we do not run over cycled Phis.
if (PendingPhiRanges.insert(Phi).second) {
ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false);
- for (auto &Op : Phi->operands()) {
+ for (const auto &Op : Phi->operands()) {
auto OpRange = getRangeRef(getSCEV(Op), SignHint);
RangeFromOps = RangeFromOps.unionWith(OpRange);
// No point to continue if we already have a full set.
@@ -6651,6 +6651,13 @@ ScalarEvolution::getRangeRef(const SCEV *S,
}
}
+ // vscale can't be equal to zero
+ if (const auto *II = dyn_cast<IntrinsicInst>(U->getValue()))
+ if (II->getIntrinsicID() == Intrinsic::vscale) {
+ ConstantRange Disallowed = APInt::getZero(BitWidth);
+ ConservativeResult = ConservativeResult.difference(Disallowed);
+ }
+
return setRange(U, SignHint, std::move(ConservativeResult));
}
@@ -6973,13 +6980,13 @@ static void collectUniqueOps(const SCEV *S,
Ops.push_back(S);
};
if (auto *S2 = dyn_cast<SCEVCastExpr>(S))
- for (auto *Op : S2->operands())
+ for (const auto *Op : S2->operands())
InsertUnique(Op);
else if (auto *S2 = dyn_cast<SCEVNAryExpr>(S))
- for (auto *Op : S2->operands())
+ for (const auto *Op : S2->operands())
InsertUnique(Op);
else if (auto *S2 = dyn_cast<SCEVUDivExpr>(S))
- for (auto *Op : S2->operands())
+ for (const auto *Op : S2->operands())
InsertUnique(Op);
}
@@ -7001,7 +7008,7 @@ ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops,
Worklist.push_back(S);
};
- for (auto *S : Ops)
+ for (const auto *S : Ops)
pushOp(S);
const Instruction *Bound = nullptr;
@@ -7013,7 +7020,7 @@ ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops,
} else {
SmallVector<const SCEV *, 4> Ops;
collectUniqueOps(S, Ops);
- for (auto *Op : Ops)
+ for (const auto *Op : Ops)
pushOp(Op);
}
}
@@ -7117,7 +7124,7 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
while (!PoisonStack.empty() && !LatchControlDependentOnPoison) {
const Instruction *Poison = PoisonStack.pop_back_val();
- for (auto *PoisonUser : Poison->users()) {
+ for (const auto *PoisonUser : Poison->users()) {
if (propagatesPoison(cast<Operator>(PoisonUser))) {
if (Pushed.insert(cast<Instruction>(PoisonUser)).second)
PoisonStack.push_back(cast<Instruction>(PoisonUser));
@@ -7242,7 +7249,7 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) {
Operator *U = cast<Operator>(V);
if (auto BO = MatchBinaryOp(U, DT)) {
bool IsConstArg = isa<ConstantInt>(BO->RHS);
- switch (U->getOpcode()) {
+ switch (BO->Opcode) {
case Instruction::Add: {
// For additions and multiplications, traverse add/mul chains for which we
// can potentially create a single SCEV, to reduce the number of
@@ -7284,7 +7291,10 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) {
} while (true);
return nullptr;
}
-
+ case Instruction::Sub:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ break;
case Instruction::AShr:
case Instruction::Shl:
case Instruction::Xor:
@@ -7296,7 +7306,10 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) {
if (!IsConstArg && BO->LHS->getType()->isIntegerTy(1))
return nullptr;
break;
+ case Instruction::LShr:
+ return getUnknown(V);
default:
+ llvm_unreachable("Unhandled binop");
break;
}
@@ -7340,12 +7353,34 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) {
// Keep constructing SCEVs' for phis recursively for now.
return nullptr;
- case Instruction::Select:
+ case Instruction::Select: {
+ // Check if U is a select that can be simplified to a SCEVUnknown.
+ auto CanSimplifyToUnknown = [this, U]() {
+ if (U->getType()->isIntegerTy(1) || isa<ConstantInt>(U->getOperand(0)))
+ return false;
+
+ auto *ICI = dyn_cast<ICmpInst>(U->getOperand(0));
+ if (!ICI)
+ return false;
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+ if (ICI->getPredicate() == CmpInst::ICMP_EQ ||
+ ICI->getPredicate() == CmpInst::ICMP_NE) {
+ if (!(isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()))
+ return true;
+ } else if (getTypeSizeInBits(LHS->getType()) >
+ getTypeSizeInBits(U->getType()))
+ return true;
+ return false;
+ };
+ if (CanSimplifyToUnknown())
+ return getUnknown(U);
+
for (Value *Inc : U->operands())
Ops.push_back(Inc);
return nullptr;
break;
-
+ }
case Instruction::Call:
case Instruction::Invoke:
if (Value *RV = cast<CallBase>(U)->getReturnedArgOperand()) {
@@ -8338,7 +8373,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE,
// All exiting blocks we have gathered dominate loop's latch, so exact trip
// count is simply a minimum out of all these calculated exit counts.
SmallVector<const SCEV *, 2> Ops;
- for (auto &ENT : ExitNotTaken) {
+ for (const auto &ENT : ExitNotTaken) {
const SCEV *BECount = ENT.ExactNotTaken;
assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!");
assert(SE->DT.dominates(ENT.ExitingBlock, Latch) &&
@@ -8348,7 +8383,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE,
Ops.push_back(BECount);
if (Preds)
- for (auto *P : ENT.Predicates)
+ for (const auto *P : ENT.Predicates)
Preds->push_back(P);
assert((Preds || ENT.hasAlwaysTruePredicate()) &&
@@ -8365,7 +8400,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE,
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock,
ScalarEvolution *SE) const {
- for (auto &ENT : ExitNotTaken)
+ for (const auto &ENT : ExitNotTaken)
if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
return ENT.ExactNotTaken;
@@ -8374,7 +8409,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock,
const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
const BasicBlock *ExitingBlock, ScalarEvolution *SE) const {
- for (auto &ENT : ExitNotTaken)
+ for (const auto &ENT : ExitNotTaken)
if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
return ENT.MaxNotTaken;
@@ -8433,8 +8468,8 @@ ScalarEvolution::ExitLimit::ExitLimit(
assert((isa<SCEVCouldNotCompute>(MaxNotTaken) ||
isa<SCEVConstant>(MaxNotTaken)) &&
"No point in having a non-constant max backedge taken count!");
- for (auto *PredSet : PredSetList)
- for (auto *P : *PredSet)
+ for (const auto *PredSet : PredSetList)
+ for (const auto *P : *PredSet)
addPredicate(P);
assert((isa<SCEVCouldNotCompute>(E) || !E->getType()->isPointerTy()) &&
"Backedge count should be int");
@@ -10522,8 +10557,8 @@ bool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred,
// Domination relationship must be a linear order on collected loops.
#ifndef NDEBUG
- for (auto *L1 : LoopsUsed)
- for (auto *L2 : LoopsUsed)
+ for (const auto *L1 : LoopsUsed)
+ for (const auto *L2 : LoopsUsed)
assert((DT.dominates(L1->getHeader(), L2->getHeader()) ||
DT.dominates(L2->getHeader(), L1->getHeader())) &&
"Domination relationship is not a linear order");
@@ -10977,8 +11012,10 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// Interpret a null as meaning no loop, where there is obviously no guard
- // (interprocedural conditions notwithstanding).
- if (!L) return true;
+ // (interprocedural conditions notwithstanding). Do not bother about
+ // unreachable loops.
+ if (!L || !DT.isReachableFromEntry(L->getHeader()))
+ return true;
if (VerifyIR)
assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
@@ -11035,12 +11072,6 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
return true;
}
- // If the loop is not reachable from the entry block, we risk running into an
- // infinite loop as we walk up into the dom tree. These loops do not matter
- // anyway, so we just return a conservative answer when we see them.
- if (!DT.isReachableFromEntry(L->getHeader()))
- return false;
-
if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
return true;
@@ -11086,6 +11117,9 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
ICmpInst::Predicate Pred,
const SCEV *LHS,
const SCEV *RHS) {
+ // Do not bother proving facts for unreachable code.
+ if (!DT.isReachableFromEntry(BB))
+ return true;
if (VerifyIR)
assert(!verifyFunction(*BB->getParent(), &dbgs()) &&
"This cannot be done on broken IR!");
@@ -11162,14 +11196,13 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
if (ProveViaGuard(Pair.first))
return true;
- const BranchInst *LoopEntryPredicate =
+ const BranchInst *BlockEntryPredicate =
dyn_cast<BranchInst>(Pair.first->getTerminator());
- if (!LoopEntryPredicate ||
- LoopEntryPredicate->isUnconditional())
+ if (!BlockEntryPredicate || BlockEntryPredicate->isUnconditional())
continue;
- if (ProveViaCond(LoopEntryPredicate->getCondition(),
- LoopEntryPredicate->getSuccessor(0) != Pair.second))
+ if (ProveViaCond(BlockEntryPredicate->getCondition(),
+ BlockEntryPredicate->getSuccessor(0) != Pair.second))
return true;
}
@@ -13179,7 +13212,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
if (!isa<SCEVCouldNotCompute>(PBT)) {
OS << "Predicated backedge-taken count is " << *PBT << "\n";
OS << " Predicates:\n";
- for (auto *P : Preds)
+ for (const auto *P : Preds)
P->print(OS, 4);
} else {
OS << "Unpredictable predicated backedge-taken count. ";
@@ -13256,7 +13289,7 @@ void ScalarEvolution::print(raw_ostream &OS) const {
}
bool First = true;
- for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
+ for (const auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
if (First) {
OS << "\t\t" "LoopDispositions: { ";
First = false;
@@ -13268,7 +13301,7 @@ void ScalarEvolution::print(raw_ostream &OS) const {
OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
}
- for (auto *InnerL : depth_first(L)) {
+ for (const auto *InnerL : depth_first(L)) {
if (InnerL == L)
continue;
if (First) {
@@ -13348,7 +13381,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
// This recurrence is variant w.r.t. L if any of its operands
// are variant.
- for (auto *Op : AR->operands())
+ for (const auto *Op : AR->operands())
if (!isLoopInvariant(Op, L))
return LoopVariant;
@@ -13363,7 +13396,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
case scSMinExpr:
case scSequentialUMinExpr: {
bool HasVarying = false;
- for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
+ for (const auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
LoopDisposition D = getLoopDisposition(Op, L);
if (D == LoopVariant)
return LoopVariant;
@@ -13529,12 +13562,12 @@ void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) {
const SCEV *Curr = Worklist.pop_back_val();
auto Users = SCEVUsers.find(Curr);
if (Users != SCEVUsers.end())
- for (auto *User : Users->second)
+ for (const auto *User : Users->second)
if (ToForget.insert(User).second)
Worklist.push_back(User);
}
- for (auto *S : ToForget)
+ for (const auto *S : ToForget)
forgetMemoizedResultsImpl(S);
for (auto I = PredicatedSCEVRewrites.begin();
@@ -13747,7 +13780,7 @@ void ScalarEvolution::verify() const {
if (ValidLoops.insert(L).second)
Worklist.append(L->begin(), L->end());
}
- for (auto &KV : ValueExprMap) {
+ for (const auto &KV : ValueExprMap) {
#ifndef NDEBUG
// Check for SCEV expressions referencing invalid/deleted loops.
if (auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second)) {
@@ -14018,7 +14051,7 @@ public:
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
if (Pred) {
if (auto *U = dyn_cast<SCEVUnionPredicate>(Pred)) {
- for (auto *Pred : U->getPredicates())
+ for (const auto *Pred : U->getPredicates())
if (const auto *IPred = dyn_cast<SCEVComparePredicate>(Pred))
if (IPred->getLHS() == Expr &&
IPred->getPredicate() == ICmpInst::ICMP_EQ)
@@ -14098,7 +14131,7 @@ private:
PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr);
if (!PredicatedRewrite)
return Expr;
- for (auto *P : PredicatedRewrite->second){
+ for (const auto *P : PredicatedRewrite->second){
// Wrap predicates from outer loops are not supported.
if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) {
if (L != WP->getExpr()->getLoop())
@@ -14135,7 +14168,7 @@ const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
// Since the transformation was successful, we can now transfer the SCEV
// predicates.
- for (auto *P : TransformPreds)
+ for (const auto *P : TransformPreds)
Preds.insert(P);
return AddRec;
@@ -14234,7 +14267,7 @@ SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR,
/// Union predicates don't get cached so create a dummy set ID for it.
SCEVUnionPredicate::SCEVUnionPredicate(ArrayRef<const SCEVPredicate *> Preds)
: SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {
- for (auto *P : Preds)
+ for (const auto *P : Preds)
add(P);
}
@@ -14253,13 +14286,13 @@ bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const {
}
void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
- for (auto Pred : Preds)
+ for (const auto *Pred : Preds)
Pred->print(OS, Depth);
}
void SCEVUnionPredicate::add(const SCEVPredicate *N) {
if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) {
- for (auto Pred : Set->Preds)
+ for (const auto *Pred : Set->Preds)
add(Pred);
return;
}
@@ -14276,7 +14309,7 @@ PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
void ScalarEvolution::registerUser(const SCEV *User,
ArrayRef<const SCEV *> Ops) {
- for (auto *Op : Ops)
+ for (const auto *Op : Ops)
// We do not expect that forgetting cached data for SCEVConstants will ever
// open any prospects for sharpening or introduce any correctness issues,
// so we don't bother storing their dependencies.
@@ -14307,7 +14340,7 @@ const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() {
if (!BackedgeCount) {
SmallVector<const SCEVPredicate *, 4> Preds;
BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, Preds);
- for (auto *P : Preds)
+ for (const auto *P : Preds)
addPredicate(*P);
}
return BackedgeCount;
@@ -14378,7 +14411,7 @@ const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) {
if (!New)
return nullptr;
- for (auto *P : NewPreds)
+ for (const auto *P : NewPreds)
addPredicate(*P);
RewriteMap[SE.getSCEV(V)] = {Generation, New};
diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp
index 52e8566aca3c..162fd75c73e0 100644
--- a/llvm/lib/Analysis/StackLifetime.cpp
+++ b/llvm/lib/Analysis/StackLifetime.cpp
@@ -182,7 +182,7 @@ void StackLifetime::calculateLocalLiveness() {
// Compute LiveIn by unioning together the LiveOut sets of all preds.
BitVector LocalLiveIn;
- for (auto *PredBB : predecessors(BB)) {
+ for (const auto *PredBB : predecessors(BB)) {
LivenessMap::const_iterator I = BlockLiveness.find(PredBB);
// If a predecessor is unreachable, ignore it.
if (I == BlockLiveness.end())
diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 94b646ab7c06..9698af3ca85c 100644
--- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -206,7 +206,7 @@ template <typename CalleeTy> struct FunctionInfo {
O << " allocas uses:\n";
if (F) {
- for (auto &I : instructions(F)) {
+ for (const auto &I : instructions(F)) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
auto &AS = Allocas.find(AI)->second;
O << " " << AI->getName() << "["
@@ -763,7 +763,7 @@ const ConstantRange *findParamAccess(const FunctionSummary &FS,
uint32_t ParamNo) {
assert(FS.isLive());
assert(FS.isDSOLocal());
- for (auto &PS : FS.paramAccesses())
+ for (const auto &PS : FS.paramAccesses())
if (ParamNo == PS.ParamNo)
return &PS.Use;
return nullptr;
@@ -823,7 +823,7 @@ GVToSSI createGlobalStackSafetyInfo(
Copy.begin()->first->getParent()->getDataLayout().getPointerSizeInBits();
StackSafetyDataFlowAnalysis<GlobalValue> SSDFA(PointerSize, std::move(Copy));
- for (auto &F : SSDFA.run()) {
+ for (const auto &F : SSDFA.run()) {
auto FI = F.second;
auto &SrcF = Functions[F.first];
for (auto &KV : FI.Allocas) {
@@ -922,7 +922,7 @@ StackSafetyInfo::getParamAccesses(ModuleSummaryIndex &Index) const {
FunctionSummary::ParamAccess &Param = ParamAccesses.back();
Param.Calls.reserve(PS.Calls.size());
- for (auto &C : PS.Calls) {
+ for (const auto &C : PS.Calls) {
// Parameter forwarded into another function by any or unknown offset
// will make ParamAccess::Range as FullSet anyway. So we can drop the
// entire parameter like we did above.
@@ -978,7 +978,7 @@ void StackSafetyGlobalInfo::print(raw_ostream &O) const {
if (SSI.empty())
return;
const Module &M = *SSI.begin()->first->getParent();
- for (auto &F : M.functions()) {
+ for (const auto &F : M.functions()) {
if (!F.isDeclaration()) {
SSI.find(&F)->second.print(O, F.getName(), &F);
O << " safe accesses:"
@@ -1094,7 +1094,7 @@ bool StackSafetyGlobalInfoWrapperPass::runOnModule(Module &M) {
bool llvm::needsParamAccessSummary(const Module &M) {
if (StackSafetyRun)
return true;
- for (auto &F : M.functions())
+ for (const auto &F : M.functions())
if (F.hasFnAttribute(Attribute::SanitizeMemTag))
return true;
return false;
@@ -1126,13 +1126,13 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) {
continue;
if (FS->isLive() && FS->isDSOLocal()) {
FunctionInfo<FunctionSummary> FI;
- for (auto &PS : FS->paramAccesses()) {
+ for (const auto &PS : FS->paramAccesses()) {
auto &US =
FI.Params
.emplace(PS.ParamNo, FunctionSummary::ParamAccess::RangeWidth)
.first->second;
US.Range = PS.Use;
- for (auto &Call : PS.Calls) {
+ for (const auto &Call : PS.Calls) {
assert(!Call.Offsets.isFullSet());
FunctionSummary *S =
findCalleeFunctionSummary(Call.Callee, FS->modulePath());
@@ -1158,10 +1158,10 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) {
NumCombinedDataFlowNodes += Functions.size();
StackSafetyDataFlowAnalysis<FunctionSummary> SSDFA(
FunctionSummary::ParamAccess::RangeWidth, std::move(Functions));
- for (auto &KV : SSDFA.run()) {
+ for (const auto &KV : SSDFA.run()) {
std::vector<FunctionSummary::ParamAccess> NewParams;
NewParams.reserve(KV.second.Params.size());
- for (auto &Param : KV.second.Params) {
+ for (const auto &Param : KV.second.Params) {
// It's not needed as FullSet is processed the same as a missing value.
if (Param.second.Range.isFullSet())
continue;
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 6e34a8303c08..cfa6e3a97626 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -58,14 +58,16 @@ bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
}
IntrinsicCostAttributes::IntrinsicCostAttributes(
- Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost)
+ Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost,
+ bool TypeBasedOnly)
: II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id),
ScalarizationCost(ScalarizationCost) {
if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
FMF = FPMO->getFastMathFlags();
- Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
+ if (!TypeBasedOnly)
+ Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
FunctionType *FTy = CI.getCalledFunction()->getFunctionType();
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
}
@@ -294,8 +296,8 @@ bool TargetTransformInfo::isHardwareLoopProfitable(
bool TargetTransformInfo::preferPredicateOverEpilogue(
Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
TargetLibraryInfo *TLI, DominatorTree *DT,
- const LoopAccessInfo *LAI) const {
- return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
+ LoopVectorizationLegality *LVL) const {
+ return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL);
}
PredicationStyle TargetTransformInfo::emitGetActiveLaneMask() const {
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 560f46d39d0d..216027778fab 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -303,7 +303,7 @@ public:
/// given offset. Update the offset to be relative to the field type.
TBAAStructTypeNode getField(uint64_t &Offset) const {
bool NewFormat = isNewFormat();
- const ArrayRef<MDOperand> Operands(Node->op_begin(), Node->op_end());
+ const ArrayRef<MDOperand> Operands = Node->operands();
const unsigned NumOperands = Operands.size();
if (NewFormat) {
@@ -811,7 +811,8 @@ MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) {
return nullptr;
// Otherwise, create TBAA with the new Len
- SmallVector<Metadata *, 4> NextNodes(MD->operands());
+ ArrayRef<MDOperand> MDOperands = MD->operands();
+ SmallVector<Metadata *, 4> NextNodes(MDOperands.begin(), MDOperands.end());
ConstantInt *PreviousSize = mdconst::extract<ConstantInt>(NextNodes[3]);
// Don't create a new MDNode if it is the same length.
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index add2d427e05b..1f3798d1338e 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -2297,7 +2297,7 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
return false;
unsigned NumUsesExplored = 0;
- for (auto *U : V->users()) {
+ for (const auto *U : V->users()) {
// Avoid massive lists
if (NumUsesExplored >= DomConditionsMaxUses)
break;
@@ -2338,7 +2338,7 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
SmallVector<const User *, 4> WorkList;
SmallPtrSet<const User *, 4> Visited;
- for (auto *CmpU : U->users()) {
+ for (const auto *CmpU : U->users()) {
assert(WorkList.empty() && "Should be!");
if (Visited.insert(CmpU).second)
WorkList.push_back(CmpU);
@@ -2352,7 +2352,7 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
// TODO: Support similar logic of OR and EQ predicate?
if (NonNullIfTrue)
if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
- for (auto *CurrU : Curr->users())
+ for (const auto *CurrU : Curr->users())
if (Visited.insert(CurrU).second)
WorkList.push_back(CurrU);
continue;
@@ -5073,7 +5073,7 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
if (DT.dominates(NoWrapEdge, Result->getParent()))
continue;
- for (auto &RU : Result->uses())
+ for (const auto &RU : Result->uses())
if (!DT.dominates(NoWrapEdge, RU))
return false;
}
@@ -5645,7 +5645,7 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
// whether a value is directly passed to an instruction that must take
// well-defined operands.
- for (auto &I : make_range(Begin, End)) {
+ for (const auto &I : make_range(Begin, End)) {
if (isa<DbgInfoIntrinsic>(I))
continue;
if (--ScanLimit == 0)
@@ -5676,7 +5676,7 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
Visited.insert(BB);
while (true) {
- for (auto &I : make_range(Begin, End)) {
+ for (const auto &I : make_range(Begin, End)) {
if (isa<DbgInfoIntrinsic>(I))
continue;
if (--ScanLimit == 0)
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 894680cda1fc..c4795a80ead2 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -762,7 +762,7 @@ static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
return;
}
- for (auto &AccGroupListOp : AccGroups->operands()) {
+ for (const auto &AccGroupListOp : AccGroups->operands()) {
auto *Item = cast<MDNode>(AccGroupListOp.get());
assert(isValidAsAccessGroup(Item) && "List item must be an access group");
List.insert(Item);
@@ -1497,7 +1497,7 @@ void VFABI::getVectorVariantNames(
SmallVector<StringRef, 8> ListAttr;
S.split(ListAttr, ",");
- for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
+ for (const auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule()));
diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 1613e7e42a0a..c5ab35d94860 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -260,6 +260,9 @@ bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) {
return false;
if (!verifyIntegerEntry(KernelMap, ".private_segment_fixed_size", true))
return false;
+ if (!verifyScalarEntry(KernelMap, ".uses_dynamic_stack", false,
+ msgpack::Type::Boolean))
+ return false;
if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true))
return false;
if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true))
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 8d5a2555f9af..1d6c21bd66d1 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -5510,6 +5510,61 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (!OperandBundles.empty())
UpgradeOperandBundles(OperandBundles);
+ if (auto *IA = dyn_cast<InlineAsm>(Callee)) {
+ InlineAsm::ConstraintInfoVector ConstraintInfo = IA->ParseConstraints();
+ auto IsLabelConstraint = [](const InlineAsm::ConstraintInfo &CI) {
+ return CI.Type == InlineAsm::isLabel;
+ };
+ if (none_of(ConstraintInfo, IsLabelConstraint)) {
+ // Upgrade explicit blockaddress arguments to label constraints.
+ // Verify that the last arguments are blockaddress arguments that
+ // match the indirect destinations. Clang always generates callbr
+ // in this form. We could support reordering with more effort.
+ unsigned FirstBlockArg = Args.size() - IndirectDests.size();
+ for (unsigned ArgNo = FirstBlockArg; ArgNo < Args.size(); ++ArgNo) {
+ unsigned LabelNo = ArgNo - FirstBlockArg;
+ auto *BA = dyn_cast<BlockAddress>(Args[ArgNo]);
+ if (!BA || BA->getFunction() != F ||
+ LabelNo > IndirectDests.size() ||
+ BA->getBasicBlock() != IndirectDests[LabelNo])
+ return error("callbr argument does not match indirect dest");
+ }
+
+ // Remove blockaddress arguments.
+ Args.erase(Args.begin() + FirstBlockArg, Args.end());
+ ArgTyIDs.erase(ArgTyIDs.begin() + FirstBlockArg, ArgTyIDs.end());
+
+ // Recreate the function type with less arguments.
+ SmallVector<Type *> ArgTys;
+ for (Value *Arg : Args)
+ ArgTys.push_back(Arg->getType());
+ FTy =
+ FunctionType::get(FTy->getReturnType(), ArgTys, FTy->isVarArg());
+
+ // Update constraint string to use label constraints.
+ std::string Constraints = IA->getConstraintString();
+ unsigned ArgNo = 0;
+ size_t Pos = 0;
+ for (const auto &CI : ConstraintInfo) {
+ if (CI.hasArg()) {
+ if (ArgNo >= FirstBlockArg)
+ Constraints.insert(Pos, "!");
+ ++ArgNo;
+ }
+
+ // Go to next constraint in string.
+ Pos = Constraints.find(',', Pos);
+ if (Pos == std::string::npos)
+ break;
+ ++Pos;
+ }
+
+ Callee = InlineAsm::get(FTy, IA->getAsmString(), Constraints,
+ IA->hasSideEffects(), IA->isAlignStack(),
+ IA->getDialect(), IA->canThrow());
+ }
+ }
+
I = CallBrInst::Create(FTy, Callee, DefaultDest, IndirectDests, Args,
OperandBundles);
ResTypeID = getContainedTypeID(FTyID);
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 0d57ae4ef9df..13d53a35084d 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1226,10 +1226,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
- MetadataList.assignValue(
- LocalAsMetadata::get(ValueList.getValueFwdRef(
- Record[1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)),
- NextMetadataNo);
+ Value *V = ValueList.getValueFwdRef(Record[1], Ty, TyID,
+ /*ConstExprInsertBB*/ nullptr);
+ if (!V)
+ return error("Invalid value reference from old fn metadata");
+
+ MetadataList.assignValue(LocalAsMetadata::get(V), NextMetadataNo);
NextMetadataNo++;
break;
}
@@ -1248,8 +1250,11 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (Ty->isMetadataTy())
Elts.push_back(getMD(Record[i + 1]));
else if (!Ty->isVoidTy()) {
- auto *MD = ValueAsMetadata::get(ValueList.getValueFwdRef(
- Record[i + 1], Ty, TyID, /*ConstExprInsertBB*/ nullptr));
+ Value *V = ValueList.getValueFwdRef(Record[i + 1], Ty, TyID,
+ /*ConstExprInsertBB*/ nullptr);
+ if (!V)
+ return error("Invalid value reference from old metadata");
+ auto *MD = ValueAsMetadata::get(V);
assert(isa<ConstantAsMetadata>(MD) &&
"Expected non-function-local metadata");
Elts.push_back(MD);
@@ -1269,10 +1274,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (Ty->isMetadataTy() || Ty->isVoidTy())
return error("Invalid record");
- MetadataList.assignValue(
- ValueAsMetadata::get(ValueList.getValueFwdRef(
- Record[1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)),
- NextMetadataNo);
+ Value *V = ValueList.getValueFwdRef(Record[1], Ty, TyID,
+ /*ConstExprInsertBB*/ nullptr);
+ if (!V)
+ return error("Invalid value reference from metadata");
+
+ MetadataList.assignValue(ValueAsMetadata::get(V), NextMetadataNo);
NextMetadataNo++;
break;
}
diff --git a/llvm/lib/Bitcode/Reader/ValueList.h b/llvm/lib/Bitcode/Reader/ValueList.h
index 995d46f01f75..a5b3f6e20707 100644
--- a/llvm/lib/Bitcode/Reader/ValueList.h
+++ b/llvm/lib/Bitcode/Reader/ValueList.h
@@ -21,7 +21,6 @@
namespace llvm {
-class Constant;
class Error;
class Type;
class Value;
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 590562ce2796..d7e012fb6a9e 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4104,8 +4104,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
for (const GlobalAlias &A : M.aliases()) {
auto *Aliasee = A.getAliaseeObject();
- if (!Aliasee->hasName())
- // Nameless function don't have an entry in the summary, skip it.
+ // Skip ifunc and nameless functions which don't have an entry in the
+ // summary.
+ if (!Aliasee->hasName() || isa<GlobalIFunc>(Aliasee))
continue;
auto AliasId = VE.getValueID(&A);
auto AliaseeId = VE.getValueID(Aliasee);
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 727ec2e02cc2..998f629aaa4e 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -802,7 +802,7 @@ void ValueEnumerator::organizeMetadata() {
// - by function, then
// - by isa<MDString>
// and then sort by the original/current ID. Since the IDs are guaranteed to
- // be unique, the result of std::sort will be deterministic. There's no need
+ // be unique, the result of llvm::sort will be deterministic. There's no need
// for std::stable_sort.
llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) {
return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) <
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index b10d79f4b5a6..9526bf7610b4 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -245,8 +245,8 @@ public:
void AccelTableWriter::emitHashes() const {
uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
unsigned BucketIdx = 0;
- for (auto &Bucket : Contents.getBuckets()) {
- for (auto &Hash : Bucket) {
+ for (const auto &Bucket : Contents.getBuckets()) {
+ for (const auto &Hash : Bucket) {
uint32_t HashValue = Hash->HashValue;
if (SkipIdenticalHashes && PrevHash == HashValue)
continue;
@@ -327,7 +327,7 @@ void AppleAccelTableWriter::emitData() const {
const auto &Buckets = Contents.getBuckets();
for (const AccelTableBase::HashList &Bucket : Buckets) {
uint64_t PrevHash = std::numeric_limits<uint64_t>::max();
- for (auto &Hash : Bucket) {
+ for (const auto &Hash : Bucket) {
// Terminate the previous entry if there is no hash collision with the
// current one.
if (PrevHash != std::numeric_limits<uint64_t>::max() &&
@@ -667,12 +667,12 @@ void AccelTableBase::print(raw_ostream &OS) const {
}
OS << "Buckets and Hashes: \n";
- for (auto &Bucket : Buckets)
- for (auto &Hash : Bucket)
+ for (const auto &Bucket : Buckets)
+ for (const auto &Hash : Bucket)
Hash->print(OS);
OS << "Data: \n";
- for (auto &E : Entries)
+ for (const auto &E : Entries)
E.second.print(OS);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 94612a51d2e1..e0050a47a6f6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -488,7 +488,7 @@ bool AsmPrinter::doInitialization(Module &M) {
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
- for (auto &I : *MI)
+ for (const auto &I : *MI)
if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
MP->beginAssembly(M, *MI, *this);
@@ -1731,7 +1731,7 @@ static unsigned getNumGlobalVariableUses(const Constant *C) {
return 1;
unsigned NumUses = 0;
- for (auto *CU : C->users())
+ for (const auto *CU : C->users())
NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU));
return NumUses;
@@ -1754,7 +1754,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
// To be a got equivalent, at least one of its users need to be a constant
// expression used by another global variable.
- for (auto *U : GV->users())
+ for (const auto *U : GV->users())
NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U));
return NumGOTEquivUsers > 0;
@@ -1797,7 +1797,7 @@ void AsmPrinter::emitGlobalGOTEquivs() {
}
GlobalGOTEquivs.clear();
- for (auto *GV : FailedCandidates)
+ for (const auto *GV : FailedCandidates)
emitGlobalVariable(GV);
}
@@ -2731,6 +2731,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// to represent relocations on supported targets. Expressions involving only
// constant addresses are constant folded instead.
switch (CE->getOpcode()) {
+ default:
+ break; // Error
case Instruction::AddrSpaceCast: {
const Constant *Op = CE->getOperand(0);
unsigned DstAS = CE->getType()->getPointerAddressSpace();
@@ -2738,24 +2740,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
if (TM.isNoopAddrSpaceCast(SrcAS, DstAS))
return lowerConstant(Op);
- // Fallthrough to error.
- LLVM_FALLTHROUGH;
- }
- default: {
- // If the code isn't optimized, there may be outstanding folding
- // opportunities. Attempt to fold the expression using DataLayout as a
- // last resort before giving up.
- Constant *C = ConstantFoldConstant(CE, getDataLayout());
- if (C != CE)
- return lowerConstant(C);
-
- // Otherwise report the problem to the user.
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/false,
- !MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(Twine(OS.str()));
+ break; // Error
}
case Instruction::GetElementPtr: {
// Generate a symbolic expression for the byte address
@@ -2860,6 +2845,21 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
}
}
+
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using DataLayout as a
+ // last resort before giving up.
+ Constant *C = ConstantFoldConstant(CE, getDataLayout());
+ if (C != CE)
+ return lowerConstant(C);
+
+ // Otherwise report the problem to the user.
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ CE->printAsOperand(OS, /*PrintType=*/false,
+ !MF ? nullptr : MF->getFunction().getParent());
+ report_fatal_error(Twine(OS.str()));
}
static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C,
@@ -3359,9 +3359,12 @@ void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV,
}
if (!AliasList)
return;
- for (const auto &AliasPair : *AliasList)
- report_fatal_error("Aliases with offset " + Twine(AliasPair.first) +
- " were not emitted.");
+ // TODO: These remaining aliases are not emitted in the correct location. Need
+ // to handle the case where the alias offset doesn't refer to any sub-element.
+ for (auto &AliasPair : *AliasList) {
+ for (const GlobalAlias *GA : AliasPair.second)
+ OutStreamer->emitLabel(getSymbol(GA));
+ }
}
void AsmPrinter::emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
@@ -3717,7 +3720,7 @@ void AsmPrinter::emitStackMaps(StackMaps &SM) {
// No GC strategy, use the default format.
NeedsDefault = true;
else
- for (auto &I : *MI) {
+ for (const auto &I : *MI) {
if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
if (MP->emitStackMaps(SM, *this))
continue;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 719fec06aa33..bfa53f5b9374 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -309,7 +309,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
// Emit the DIE children if any.
if (Die.hasChildren()) {
- for (auto &Child : Die.children())
+ for (const auto &Child : Die.children())
emitDwarfDIE(Child);
OutStreamer->AddComment("End Of Children Mark");
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 5da50d7aab9f..1d546e5fd72e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -374,7 +374,7 @@ void DIEHash::computeHash(const DIE &Die) {
addAttributes(Die);
// Then hash each of the children of the DIE.
- for (auto &C : Die.children()) {
+ for (const auto &C : Die.children()) {
// 7.27 Step 7
// If C is a nested type entry or a member function entry, ...
if (isType(C.getTag()) || (C.getTag() == dwarf::DW_TAG_subprogram && isType(C.getParent()->getTag()))) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 1358f4d25990..dabbfb45f687 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -340,11 +340,11 @@ static void clobberRegEntries(InlinedEntity Var, unsigned RegNo,
if (Entry.getInstr()->hasDebugOperandForReg(RegNo)) {
IndicesToErase.push_back(Index);
Entry.endEntry(ClobberIndex);
- for (auto &MO : Entry.getInstr()->debug_operands())
+ for (const auto &MO : Entry.getInstr()->debug_operands())
if (MO.isReg() && MO.getReg() && MO.getReg() != RegNo)
MaybeRemovedRegisters.insert(MO.getReg());
} else {
- for (auto &MO : Entry.getInstr()->debug_operands())
+ for (const auto &MO : Entry.getInstr()->debug_operands())
if (MO.isReg() && MO.getReg())
KeepRegisters.insert(MO.getReg());
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 660a064687d3..8ebbed974abb 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -304,7 +304,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin();
if (Entries.front().getInstr()->getDebugExpression()->isFragment()) {
// Mark all non-overlapping initial fragments.
- for (auto I = Entries.begin(); I != Entries.end(); ++I) {
+ for (const auto *I = Entries.begin(); I != Entries.end(); ++I) {
if (!I->isDbgValue())
continue;
const DIExpression *Fragment = I->getInstr()->getDebugExpression();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index b3f99d346faa..b26960cdebb8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -848,7 +848,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
Optional<unsigned> NVPTXAddressSpace;
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- for (auto &Fragment : DV.getFrameIndexExprs()) {
+ for (const auto &Fragment : DV.getFrameIndexExprs()) {
Register FrameReg;
const DIExpression *Expr = Fragment.Expr;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
@@ -970,7 +970,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
SmallDenseSet<DbgVariable *, 8> Visiting;
// Initialize the worklist and the DIVariable lookup table.
- for (auto Var : reverse(Input)) {
+ for (auto *Var : reverse(Input)) {
DbgVar.insert({Var->getVariable(), Var});
WorkList.push_back({Var, 0});
}
@@ -1005,7 +1005,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
// Push dependencies and this node onto the worklist, so that this node is
// visited again after all of its dependencies are handled.
WorkList.push_back({Var, 1});
- for (auto *Dependency : dependencies(Var)) {
+ for (const auto *Dependency : dependencies(Var)) {
// Don't add dependency if it is in a different lexical scope or a global.
if (const auto *Dep = dyn_cast<const DILocalVariable>(Dependency))
if (DbgVariable *Var = DbgVar.lookup(Dep))
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 866338a949f3..54af14429907 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -819,7 +819,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
}
// Do not emit CSInfo for undef forwarding registers.
- for (auto &MO : CallMI->uses())
+ for (const auto &MO : CallMI->uses())
if (MO.isReg() && MO.isUndef())
ForwardedRegWorklist.erase(MO.getReg());
@@ -2235,7 +2235,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
#endif
// Construct abstract scopes.
for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
- auto *SP = cast<DISubprogram>(AScope->getScopeNode());
+ const auto *SP = cast<DISubprogram>(AScope->getScopeNode());
for (const DINode *DN : SP->getRetainedNodes()) {
if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
continue;
@@ -2527,7 +2527,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
using Encoding = DWARFExpression::Operation::Encoding;
uint64_t Offset = 0;
- for (auto &Op : Expr) {
+ for (const auto &Op : Expr) {
assert(Op.getCode() != dwarf::DW_OP_const_type &&
"3 operand ops not yet supported");
Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index ad9dc517539a..f21c1bf4e914 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -187,7 +187,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
AtomicInsts.push_back(&I);
bool MadeChange = false;
- for (auto I : AtomicInsts) {
+ for (auto *I : AtomicInsts) {
auto LI = dyn_cast<LoadInst>(I);
auto SI = dyn_cast<StoreInst>(I);
auto RMWI = dyn_cast<AtomicRMWInst>(I);
@@ -1371,7 +1371,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// Look for any users of the cmpxchg that are just comparing the loaded value
// against the desired one, and replace them with the CFG-derived version.
SmallVector<ExtractValueInst *, 2> PrunedInsts;
- for (auto User : CI->users()) {
+ for (auto *User : CI->users()) {
ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
if (!EV)
continue;
@@ -1388,7 +1388,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
}
// We can remove the instructions now we're no longer iterating through them.
- for (auto EV : PrunedInsts)
+ for (auto *EV : PrunedInsts)
EV->eraseFromParent();
if (!CI->use_empty()) {
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index f05f5b9f9947..958212a0e448 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -268,8 +268,8 @@ void llvm::sortBasicBlocksAndUpdateBranches(
// If the exception section begins with a landing pad, that landing pad will
// assume a zero offset (relative to @LPStart) in the LSDA. However, a value of
// zero implies "no landing pad." This function inserts a NOP just before the EH
-// pad label to ensure a nonzero offset. Returns true if padding is not needed.
-static bool avoidZeroOffsetLandingPad(MachineFunction &MF) {
+// pad label to ensure a nonzero offset.
+void llvm::avoidZeroOffsetLandingPad(MachineFunction &MF) {
for (auto &MBB : MF) {
if (MBB.isBeginSection() && MBB.isEHPad()) {
MachineBasicBlock::iterator MI = MBB.begin();
@@ -278,10 +278,8 @@ static bool avoidZeroOffsetLandingPad(MachineFunction &MF) {
MCInst Nop = MF.getSubtarget().getInstrInfo()->getNop();
BuildMI(MBB, MI, DebugLoc(),
MF.getSubtarget().getInstrInfo()->get(Nop.getOpcode()));
- return false;
}
}
- return true;
}
// This checks if the source of this function has drifted since this binary was
@@ -297,7 +295,7 @@ static bool hasInstrProfHashMismatch(MachineFunction &MF) {
auto *Existing = MF.getFunction().getMetadata(LLVMContext::MD_annotation);
if (Existing) {
MDTuple *Tuple = cast<MDTuple>(Existing);
- for (auto &N : Tuple->operands())
+ for (const auto &N : Tuple->operands())
if (cast<MDString>(N.get())->getString() == MetadataName)
return true;
}
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 689e49978d43..519b24c21d7a 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -121,7 +121,7 @@ bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI,
assert(MI && "Dead valno in interval");
}
- if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis()))
+ if (!TII.isTriviallyReMaterializable(*MI))
return false;
}
return true;
@@ -279,7 +279,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
MRI.clearSimpleHint(LI.reg());
std::set<Register> HintedRegs;
- for (auto &Hint : CopyHints) {
+ for (const auto &Hint : CopyHints) {
if (!HintedRegs.insert(Hint.Reg).second ||
(TargetHint.first != 0 && Hint.Reg == TargetHint.second))
// Don't add the same reg twice or the target-type hint again.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6778af22f532..b6c762b93ca5 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -730,7 +730,7 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
// (Repeatedly) merging blocks into their predecessors can create redundant
// debug intrinsics.
- for (auto &Pred : Preds)
+ for (const auto &Pred : Preds)
if (auto *BB = cast_or_null<BasicBlock>(Pred))
RemoveRedundantDbgInstrs(BB);
@@ -3684,7 +3684,7 @@ private:
// Phi we added (subject to match) and both of them is in the same basic
// block then we can match our pair if values match. So we state that
// these values match and add it to work list to verify that.
- for (auto B : Item.first->blocks()) {
+ for (auto *B : Item.first->blocks()) {
Value *FirstValue = Item.first->getIncomingValueForBlock(B);
Value *SecondValue = Item.second->getIncomingValueForBlock(B);
if (FirstValue == SecondValue)
@@ -5227,18 +5227,31 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+ Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
if (SunkAddr) {
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
<< " for " << *MemoryInst << "\n");
- if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+ if (SunkAddr->getType() != Addr->getType()) {
+ if (SunkAddr->getType()->getPointerAddressSpace() !=
+ Addr->getType()->getPointerAddressSpace() &&
+ !DL->isNonIntegralPointerType(Addr->getType())) {
+ // There are two reasons the address spaces might not match: a no-op
+ // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
+ // ptrtoint/inttoptr pair to ensure we match the original semantics.
+ // TODO: allow bitcast between different address space pointers with the
+ // same size.
+ SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
+ SunkAddr =
+ Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
+ } else
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+ }
} else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
SubtargetInfo->addrSinkUsingGEPs())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
<< " for " << *MemoryInst << "\n");
- Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
Value *ResultPtr = nullptr, *ResultIndex = nullptr;
// First, find the pointer.
@@ -5361,8 +5374,21 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
AddrMode.InBounds);
}
- if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+ if (SunkAddr->getType() != Addr->getType()) {
+ if (SunkAddr->getType()->getPointerAddressSpace() !=
+ Addr->getType()->getPointerAddressSpace() &&
+ !DL->isNonIntegralPointerType(Addr->getType())) {
+ // There are two reasons the address spaces might not match: a no-op
+ // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
+ // ptrtoint/inttoptr pair to ensure we match the original semantics.
+ // TODO: allow bitcast between different address space pointers with
+ // the same size.
+ SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
+ SunkAddr =
+ Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
+ } else
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
+ }
}
} else {
// We'd require a ptrtoint/inttoptr down the line, which we can't do for
@@ -7793,9 +7819,11 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
}
// After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
// alive on IndirectBr edges).
- assert(find_if(GEPIOp->users(), [&](User *Usr) {
- return cast<Instruction>(Usr)->getParent() != SrcBlock;
- }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock");
+ assert(llvm::none_of(GEPIOp->users(),
+ [&](User *Usr) {
+ return cast<Instruction>(Usr)->getParent() != SrcBlock;
+ }) &&
+ "GEPIOp is used outside SrcBlock");
return true;
}
diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp
index 42192f41dbda..34fb1d286a58 100644
--- a/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -239,7 +239,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
});
if (ResourceAvail && shouldAddToPacket(MI)) {
// Dependency check for MI with instructions in CurrentPacketMIs.
- for (auto MJ : CurrentPacketMIs) {
+ for (auto *MJ : CurrentPacketMIs) {
SUnit *SUJ = MIToSUnit[MJ];
assert(SUJ && "Missing SUnit Info!");
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 32858d043383..c108f0088d43 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -576,7 +576,7 @@ static bool hasSameValue(const MachineRegisterInfo &MRI,
// If the instruction could modify memory, or there may be some intervening
// store between the two, we can't consider them to be equal.
- if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad(nullptr))
+ if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad())
return false;
// We also can't guarantee that they are the same if, for example, the
@@ -808,7 +808,7 @@ void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv,
// TBB and FBB should not dominate any blocks.
// Tail children should be transferred to Head.
MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
- for (auto B : Removed) {
+ for (auto *B : Removed) {
MachineDomTreeNode *Node = DomTree->getNode(B);
assert(Node != HeadNode && "Cannot erase the head node");
while (Node->getNumChildren()) {
@@ -826,7 +826,7 @@ void updateLoops(MachineLoopInfo *Loops,
return;
// If-conversion doesn't change loop structure, and it doesn't mess with back
// edges, so updating LoopInfo is simply removing the dead blocks.
- for (auto B : Removed)
+ for (auto *B : Removed)
Loops->removeBlock(B);
}
} // namespace
@@ -1065,7 +1065,7 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
// if-conversion in a single pass. The tryConvertIf() function may erase
// blocks, but only blocks dominated by the head block. This makes it safe to
// update the dominator tree while the post-order iterator is still active.
- for (auto DomNode : post_order(DomTree))
+ for (auto *DomNode : post_order(DomTree))
if (tryConvertIf(DomNode->getBlock()))
Changed = true;
@@ -1198,7 +1198,7 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {
// if-conversion in a single pass. The tryConvertIf() function may erase
// blocks, but only blocks dominated by the head block. This makes it safe to
// update the dominator tree while the post-order iterator is still active.
- for (auto DomNode : post_order(DomTree))
+ for (auto *DomNode : post_order(DomTree))
if (tryConvertIf(DomNode->getBlock()))
Changed = true;
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 59932a542bbc..db4d42bf3ca4 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -82,8 +83,11 @@ STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations");
/// \returns Whether the vector mask \p MaskVal has all lane bits set.
static bool isAllTrueMask(Value *MaskVal) {
- auto *ConstVec = dyn_cast<ConstantVector>(MaskVal);
- return ConstVec && ConstVec->isAllOnesValue();
+ if (Value *SplattedVal = getSplatValue(MaskVal))
+ if (auto *ConstValue = dyn_cast<Constant>(SplattedVal))
+ return ConstValue->isAllOnesValue();
+
+ return false;
}
/// \returns A non-excepting divisor constant for this type.
@@ -171,6 +175,10 @@ struct CachingVPExpander {
Value *expandPredicationInReduction(IRBuilder<> &Builder,
VPReductionIntrinsic &PI);
+ /// \brief Lower this VP memory operation to a non-VP intrinsic.
+ Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
+ VPIntrinsic &VPI);
+
/// \brief Query TTI and expand the vector predication in \p P accordingly.
Value *expandPredication(VPIntrinsic &PI);
@@ -389,6 +397,71 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
return Reduction;
}
+Value *
+CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder,
+ VPIntrinsic &VPI) {
+ assert(VPI.canIgnoreVectorLengthParam());
+
+ const auto &DL = F.getParent()->getDataLayout();
+
+ Value *MaskParam = VPI.getMaskParam();
+ Value *PtrParam = VPI.getMemoryPointerParam();
+ Value *DataParam = VPI.getMemoryDataParam();
+ bool IsUnmasked = isAllTrueMask(MaskParam);
+
+ MaybeAlign AlignOpt = VPI.getPointerAlignment();
+
+ Value *NewMemoryInst = nullptr;
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Not a VP memory intrinsic");
+ case Intrinsic::vp_store:
+ if (IsUnmasked) {
+ StoreInst *NewStore =
+ Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false);
+ if (AlignOpt.has_value())
+ NewStore->setAlignment(AlignOpt.value());
+ NewMemoryInst = NewStore;
+ } else
+ NewMemoryInst = Builder.CreateMaskedStore(
+ DataParam, PtrParam, AlignOpt.valueOrOne(), MaskParam);
+
+ break;
+ case Intrinsic::vp_load:
+ if (IsUnmasked) {
+ LoadInst *NewLoad =
+ Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false);
+ if (AlignOpt.has_value())
+ NewLoad->setAlignment(AlignOpt.value());
+ NewMemoryInst = NewLoad;
+ } else
+ NewMemoryInst = Builder.CreateMaskedLoad(
+ VPI.getType(), PtrParam, AlignOpt.valueOrOne(), MaskParam);
+
+ break;
+ case Intrinsic::vp_scatter: {
+ auto *ElementType =
+ cast<VectorType>(DataParam->getType())->getElementType();
+ NewMemoryInst = Builder.CreateMaskedScatter(
+ DataParam, PtrParam,
+ AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam);
+ break;
+ }
+ case Intrinsic::vp_gather: {
+ auto *ElementType = cast<VectorType>(VPI.getType())->getElementType();
+ NewMemoryInst = Builder.CreateMaskedGather(
+ VPI.getType(), PtrParam,
+ AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam, nullptr,
+ VPI.getName());
+ break;
+ }
+ }
+
+ assert(NewMemoryInst);
+ replaceOperation(*NewMemoryInst, VPI);
+ return NewMemoryInst;
+}
+
void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
@@ -465,6 +538,16 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
return expandPredicationInReduction(Builder, *VPRI);
+ switch (VPI.getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::vp_load:
+ case Intrinsic::vp_store:
+ case Intrinsic::vp_gather:
+ case Intrinsic::vp_scatter:
+ return expandPredicationInMemoryIntrinsic(Builder, VPI);
+ }
+
return &VPI;
}
diff --git a/llvm/lib/CodeGen/FaultMaps.cpp b/llvm/lib/CodeGen/FaultMaps.cpp
index 3ec666227651..3f8fe2402d65 100644
--- a/llvm/lib/CodeGen/FaultMaps.cpp
+++ b/llvm/lib/CodeGen/FaultMaps.cpp
@@ -85,7 +85,7 @@ void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel,
OS.emitInt32(0); // Reserved
- for (auto &Fault : FFI) {
+ for (const auto &Fault : FFI) {
LLVM_DEBUG(dbgs() << WFMP << " fault type: "
<< faultTypeToString(Fault.Kind) << "\n");
OS.emitInt32(Fault.Kind);
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index ac140e745600..6a0d1c33d3e3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -319,7 +319,7 @@ const GISelInstProfileBuilder &
GISelInstProfileBuilder::addNodeID(const MachineInstr *MI) const {
addNodeIDMBB(MI->getParent());
addNodeIDOpcode(MI->getOpcode());
- for (auto &Op : MI->operands())
+ for (const auto &Op : MI->operands())
addNodeIDMachineOperand(Op);
addNodeIDFlag(MI->getFlags());
return *this;
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index b06043fb4c31..6c36c6445c65 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -116,7 +116,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
// we'll pass to the assigner function.
unsigned i = 0;
unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
- for (auto &Arg : CB.args()) {
+ for (const auto &Arg : CB.args()) {
ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(CB, i),
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
@@ -960,7 +960,7 @@ bool CallLowering::parametersInCSRMatch(
const SmallVectorImpl<CCValAssign> &OutLocs,
const SmallVectorImpl<ArgInfo> &OutArgs) const {
for (unsigned i = 0; i < OutLocs.size(); ++i) {
- auto &ArgLoc = OutLocs[i];
+ const auto &ArgLoc = OutLocs[i];
// If it's not a register, it's fine.
if (!ArgLoc.isRegLoc())
continue;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index ad0c0c8315dc..da054b9c14fb 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2385,7 +2385,7 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
// loading from. To be safe, let's just assume that all loads and stores
// are different (unless we have something which is guaranteed to not
// change.)
- if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
+ if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
return false;
// If both instructions are loads or stores, they are equal only if both
@@ -2396,7 +2396,7 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
if (!LS1 || !LS2)
return false;
- if (!I2->isDereferenceableInvariantLoad(nullptr) ||
+ if (!I2->isDereferenceableInvariantLoad() ||
(LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
return false;
}
@@ -4800,24 +4800,22 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
auto BuildUDIVPattern = [&](const Constant *C) {
auto *CI = cast<ConstantInt>(C);
const APInt &Divisor = CI->getValue();
- UnsignedDivisonByConstantInfo magics =
- UnsignedDivisonByConstantInfo::get(Divisor);
+ UnsignedDivisionByConstantInfo magics =
+ UnsignedDivisionByConstantInfo::get(Divisor);
unsigned PreShift = 0, PostShift = 0;
// If the divisor is even, we can avoid using the expensive fixup by
// shifting the divided value upfront.
- if (magics.IsAdd != 0 && !Divisor[0]) {
+ if (magics.IsAdd && !Divisor[0]) {
PreShift = Divisor.countTrailingZeros();
// Get magic number for the shifted divisor.
magics =
- UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
- assert(magics.IsAdd == 0 && "Should use cheap fixup now");
+ UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(!magics.IsAdd && "Should use cheap fixup now");
}
- APInt Magic = magics.Magic;
-
unsigned SelNPQ;
- if (magics.IsAdd == 0 || Divisor.isOneValue()) {
+ if (!magics.IsAdd || Divisor.isOneValue()) {
assert(magics.ShiftAmount < Divisor.getBitWidth() &&
"We shouldn't generate an undefined shift!");
PostShift = magics.ShiftAmount;
@@ -4829,7 +4827,7 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
PreShifts.push_back(
MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
- MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
+ MagicFactors.push_back(MIB.buildConstant(ScalarTy, magics.Magic).getReg(0));
NPQFactors.push_back(
MIB.buildConstant(ScalarTy,
SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 947facc87b71..dbdcfe0b6f0b 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -166,8 +167,10 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<StackProtector>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
- if (OptLevel != CodeGenOpt::None)
+ if (OptLevel != CodeGenOpt::None) {
AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ }
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<TargetLibraryInfoWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
@@ -684,7 +687,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
CaseClusterVector Clusters;
Clusters.reserve(SI.getNumCases());
- for (auto &I : SI.cases()) {
+ for (const auto &I : SI.cases()) {
MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor());
assert(Succ && "Could not find successor mbb in mapping");
const ConstantInt *CaseVal = I.getCaseValue();
@@ -1275,26 +1278,41 @@ static bool isSwiftError(const Value *V) {
bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
const LoadInst &LI = cast<LoadInst>(U);
- if (DL->getTypeStoreSize(LI.getType()) == 0)
+
+ unsigned StoreSize = DL->getTypeStoreSize(LI.getType());
+ if (StoreSize == 0)
return true;
ArrayRef<Register> Regs = getOrCreateVRegs(LI);
ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
Register Base = getOrCreateVReg(*LI.getPointerOperand());
+ AAMDNodes AAInfo = LI.getAAMetadata();
- Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType());
+ const Value *Ptr = LI.getPointerOperand();
+ Type *OffsetIRTy = DL->getIntPtrType(Ptr->getType());
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
- if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) {
+ if (CLI->supportSwiftError() && isSwiftError(Ptr)) {
assert(Regs.size() == 1 && "swifterror should be single pointer");
- Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(),
- LI.getPointerOperand());
+ Register VReg =
+ SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), Ptr);
MIRBuilder.buildCopy(Regs[0], VReg);
return true;
}
auto &TLI = *MF->getSubtarget().getTargetLowering();
MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL);
+ if (AA && !(Flags & MachineMemOperand::MOInvariant)) {
+ if (AA->pointsToConstantMemory(
+ MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) {
+ Flags |= MachineMemOperand::MOInvariant;
+
+ // FIXME: pointsToConstantMemory probably does not imply dereferenceable,
+ // but the previous usage implied it did. Probably should check
+ // isDereferenceableAndAlignedPointer.
+ Flags |= MachineMemOperand::MODereferenceable;
+ }
+ }
const MDNode *Ranges =
Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
@@ -1306,7 +1324,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
Align BaseAlign = getMemOpAlign(LI);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, MRI->getType(Regs[i]),
- commonAlignment(BaseAlign, Offsets[i] / 8), LI.getAAMetadata(), Ranges,
+ commonAlignment(BaseAlign, Offsets[i] / 8), AAInfo, Ranges,
LI.getSyncScopeID(), LI.getOrdering());
MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
}
@@ -1400,7 +1418,7 @@ bool IRTranslator::translateInsertValue(const User &U,
ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
- auto InsertedIt = InsertedRegs.begin();
+ auto *InsertedIt = InsertedRegs.begin();
for (unsigned i = 0; i < DstRegs.size(); ++i) {
if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end())
@@ -1563,9 +1581,9 @@ bool IRTranslator::translateGetElementPtr(const User &U,
bool IRTranslator::translateMemFunc(const CallInst &CI,
MachineIRBuilder &MIRBuilder,
unsigned Opcode) {
-
+ const Value *SrcPtr = CI.getArgOperand(1);
// If the source is undef, then just emit a nop.
- if (isa<UndefValue>(CI.getArgOperand(1)))
+ if (isa<UndefValue>(SrcPtr))
return true;
SmallVector<Register, 3> SrcRegs;
@@ -1595,15 +1613,20 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
unsigned IsVol =
cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1))->getZExtValue();
+ ConstantInt *CopySize = nullptr;
+
if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
DstAlign = MCI->getDestAlign().valueOrOne();
SrcAlign = MCI->getSourceAlign().valueOrOne();
+ CopySize = dyn_cast<ConstantInt>(MCI->getArgOperand(2));
} else if (auto *MCI = dyn_cast<MemCpyInlineInst>(&CI)) {
DstAlign = MCI->getDestAlign().valueOrOne();
SrcAlign = MCI->getSourceAlign().valueOrOne();
+ CopySize = dyn_cast<ConstantInt>(MCI->getArgOperand(2));
} else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
DstAlign = MMI->getDestAlign().valueOrOne();
SrcAlign = MMI->getSourceAlign().valueOrOne();
+ CopySize = dyn_cast<ConstantInt>(MMI->getArgOperand(2));
} else {
auto *MSI = cast<MemSetInst>(&CI);
DstAlign = MSI->getDestAlign().valueOrOne();
@@ -1617,14 +1640,31 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
}
// Create mem operands to store the alignment and volatile info.
- auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
- ICall.addMemOperand(MF->getMachineMemOperand(
- MachinePointerInfo(CI.getArgOperand(0)),
- MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
+ MachineMemOperand::Flags LoadFlags = MachineMemOperand::MOLoad;
+ MachineMemOperand::Flags StoreFlags = MachineMemOperand::MOStore;
+ if (IsVol) {
+ LoadFlags |= MachineMemOperand::MOVolatile;
+ StoreFlags |= MachineMemOperand::MOVolatile;
+ }
+
+ AAMDNodes AAInfo = CI.getAAMetadata();
+ if (AA && CopySize &&
+ AA->pointsToConstantMemory(MemoryLocation(
+ SrcPtr, LocationSize::precise(CopySize->getZExtValue()), AAInfo))) {
+ LoadFlags |= MachineMemOperand::MOInvariant;
+
+ // FIXME: pointsToConstantMemory probably does not imply dereferenceable,
+ // but the previous usage implied it did. Probably should check
+ // isDereferenceableAndAlignedPointer.
+ LoadFlags |= MachineMemOperand::MODereferenceable;
+ }
+
+ ICall.addMemOperand(
+ MF->getMachineMemOperand(MachinePointerInfo(CI.getArgOperand(0)),
+ StoreFlags, 1, DstAlign, AAInfo));
if (Opcode != TargetOpcode::G_MEMSET)
ICall.addMemOperand(MF->getMachineMemOperand(
- MachinePointerInfo(CI.getArgOperand(1)),
- MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));
+ MachinePointerInfo(SrcPtr), LoadFlags, 1, SrcAlign, AAInfo));
return true;
}
@@ -1785,7 +1825,7 @@ bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
// Yes. Let's translate it.
SmallVector<llvm::SrcOp, 4> VRegs;
- for (auto &Arg : CI.args())
+ for (const auto &Arg : CI.args())
VRegs.push_back(getOrCreateVReg(*Arg));
MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
@@ -2305,7 +2345,7 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
SmallVector<ArrayRef<Register>, 8> Args;
Register SwiftInVReg = 0;
Register SwiftErrorVReg = 0;
- for (auto &Arg : CB.args()) {
+ for (const auto &Arg : CB.args()) {
if (CLI->supportSwiftError() && isSwiftError(Arg)) {
assert(SwiftInVReg == 0 && "Expected only one swift error argument");
LLT Ty = getLLTForType(*Arg->getType(), *DL);
@@ -2394,7 +2434,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI);
- for (auto &Arg : enumerate(CI.args())) {
+ for (const auto &Arg : enumerate(CI.args())) {
// If this is required to be an immediate, don't materialize it in a
// register.
if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
@@ -2947,7 +2987,7 @@ void IRTranslator::finishPendingPhis() {
for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
auto IRPred = PI->getIncomingBlock(i);
ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
- for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
+ for (auto *Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred))
continue;
SeenPreds.insert(Pred);
@@ -3347,10 +3387,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
TM.resetTargetOptions(F);
EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
FuncInfo.MF = MF;
- if (EnableOpts)
+ if (EnableOpts) {
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- else
+ } else {
+ AA = nullptr;
FuncInfo.BPI = nullptr;
+ }
FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 95ae8383b6fa..e0357c50e555 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -332,6 +332,8 @@ bool InlineAsmLowering::lowerInlineAsm(
}
++ResNo;
} else {
+ assert(OpInfo.Type != InlineAsm::isLabel &&
+ "GlobalISel currently doesn't support callbr");
OpInfo.ConstraintVT = MVT::Other;
}
@@ -427,7 +429,8 @@ bool InlineAsmLowering::lowerInlineAsm(
}
break;
- case InlineAsm::isInput: {
+ case InlineAsm::isInput:
+ case InlineAsm::isLabel: {
if (OpInfo.isMatchingInputConstraint()) {
unsigned DefIdx = OpInfo.getMatchedOperand();
// Find operand with register def that corresponds to DefIdx.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index fb046d519ac8..52ee13757f27 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2393,30 +2393,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_FCONSTANT: {
+ // To avoid changing the bits of the constant due to extension to a larger
+ // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
MachineOperand &SrcMO = MI.getOperand(1);
- LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
- APFloat Val = SrcMO.getFPImm()->getValueAPF();
- bool LosesInfo;
- switch (WideTy.getSizeInBits()) {
- case 32:
- Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
- &LosesInfo);
- break;
- case 64:
- Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
- &LosesInfo);
- break;
- default:
- return UnableToLegalize;
- }
-
- assert(!LosesInfo && "extend should always be lossless");
-
- Observer.changingInstr(MI);
- SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
-
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
- Observer.changedInstr(MI);
+ APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
+ MIRBuilder.setInstrAndDebugLoc(MI);
+ auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
+ widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
+ MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_IMPLICIT_DEF: {
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index d4fbf7d15089..be1bc865d1e1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -298,7 +298,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
const auto &LegalSizes = LegalStoreSizes[AS];
#ifndef NDEBUG
- for (auto StoreMI : StoresToMerge)
+ for (auto *StoreMI : StoresToMerge)
assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
#endif
@@ -366,7 +366,7 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
// directly. Otherwise, we need to generate some instructions to merge the
// existing values together into a wider type.
SmallVector<APInt, 8> ConstantVals;
- for (auto Store : Stores) {
+ for (auto *Store : Stores) {
auto MaybeCst =
getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI);
if (!MaybeCst) {
@@ -415,7 +415,7 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
return R;
});
- for (auto MI : Stores)
+ for (auto *MI : Stores)
InstsToErase.insert(MI);
return true;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 0d9580e25606..2e22dae35e5a 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -646,7 +646,7 @@ MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res,
SmallVector<SrcOp> TmpVec;
TmpVec.reserve(Ops.size());
LLT EltTy = Res.getLLTTy(*getMRI()).getElementType();
- for (auto &Op : Ops)
+ for (const auto &Op : Ops)
TmpVec.push_back(buildConstant(EltTy, Op));
return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 7781761bc131..013c8700e8ae 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -228,7 +228,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
return false;
// Instructions without side-effects are dead iff they only define dead vregs.
- for (auto &MO : MI.operands()) {
+ for (const auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index 67d6a3df7807..258ad1931b12 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -332,7 +332,7 @@ void HardwareLoop::Create() {
// Run through the basic blocks of the loop and see if any of them have dead
// PHIs that can be removed.
- for (auto I : L->blocks())
+ for (auto *I : L->blocks())
DeleteDeadPHIs(I);
}
@@ -407,13 +407,13 @@ Value *HardwareLoop::InitLoopCount() {
BasicBlock *Predecessor = BB->getSinglePredecessor();
// If it's not safe to create a while loop then don't force it and create a
// do-while loop instead
- if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE))
+ if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator()))
UseLoopGuard = false;
else
BB = Predecessor;
}
- if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
+ if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) {
LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
<< *ExitCount << "\n");
return nullptr;
diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index fc97938ccd3e..da6ec76bd770 100644
--- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -758,7 +758,7 @@ void ImplicitNullChecks::rewriteNullChecks(
ArrayRef<ImplicitNullChecks::NullCheck> NullCheckList) {
DebugLoc DL;
- for (auto &NC : NullCheckList) {
+ for (const auto &NC : NullCheckList) {
// Remove the conditional branch dependent on the null check.
unsigned BranchesRemoved = TII->removeBranch(*NC.getCheckBlock());
(void)BranchesRemoved;
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 06c660807c5c..3ea1d6c7f1ef 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -86,7 +86,6 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
MachineFunction &MF;
LiveIntervals &LIS;
LiveStacks &LSS;
- AliasAnalysis *AA;
MachineDominatorTree &MDT;
MachineLoopInfo &Loops;
VirtRegMap &VRM;
@@ -140,7 +139,6 @@ public:
VirtRegMap &vrm)
: MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
LSS(pass.getAnalysis<LiveStacks>()),
- AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
MDT(pass.getAnalysis<MachineDominatorTree>()),
Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
@@ -159,7 +157,6 @@ class InlineSpiller : public Spiller {
MachineFunction &MF;
LiveIntervals &LIS;
LiveStacks &LSS;
- AliasAnalysis *AA;
MachineDominatorTree &MDT;
MachineLoopInfo &Loops;
VirtRegMap &VRM;
@@ -200,7 +197,6 @@ public:
VirtRegAuxInfo &VRAI)
: MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()),
LSS(Pass.getAnalysis<LiveStacks>()),
- AA(&Pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
MDT(Pass.getAnalysis<MachineDominatorTree>()),
Loops(Pass.getAnalysis<MachineLoopInfo>()), VRM(VRM),
MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
@@ -659,7 +655,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
/// reMaterializeAll - Try to rematerialize as many uses as possible,
/// and trim the live ranges after.
void InlineSpiller::reMaterializeAll() {
- if (!Edit->anyRematerializable(AA))
+ if (!Edit->anyRematerializable())
return;
UsedValues.clear();
@@ -702,7 +698,7 @@ void InlineSpiller::reMaterializeAll() {
if (DeadDefs.empty())
return;
LLVM_DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
- Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA);
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
// LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions
// after rematerialization. To remove a VNI for a vreg from its LiveInterval,
@@ -1180,7 +1176,7 @@ void InlineSpiller::spillAll() {
// Hoisted spills may cause dead code.
if (!DeadDefs.empty()) {
LLVM_DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
- Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA);
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
}
// Finally delete the SnippetCopies.
@@ -1298,7 +1294,7 @@ void HoistSpillHelper::rmRedundantSpills(
// For each spill saw, check SpillBBToSpill[] and see if its BB already has
// another spill inside. If a BB contains more than one spill, only keep the
// earlier spill with smaller SlotIndex.
- for (const auto CurrentSpill : Spills) {
+ for (auto *const CurrentSpill : Spills) {
MachineBasicBlock *Block = CurrentSpill->getParent();
MachineDomTreeNode *Node = MDT.getBase().getNode(Block);
MachineInstr *PrevSpill = SpillBBToSpill[Node];
@@ -1313,7 +1309,7 @@ void HoistSpillHelper::rmRedundantSpills(
SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill;
}
}
- for (const auto SpillToRm : SpillsToRm)
+ for (auto *const SpillToRm : SpillsToRm)
Spills.erase(SpillToRm);
}
@@ -1347,7 +1343,7 @@ void HoistSpillHelper::getVisitOrders(
// the path starting from the first node with non-redundant spill to the Root
// node will be added to the WorkSet, which will contain all the possible
// locations where spills may be hoisted to after the loop below is done.
- for (const auto Spill : Spills) {
+ for (auto *const Spill : Spills) {
MachineBasicBlock *Block = Spill->getParent();
MachineDomTreeNode *Node = MDT[Block];
MachineInstr *SpillToRm = nullptr;
@@ -1492,7 +1488,7 @@ void HoistSpillHelper::runHoistSpills(
: BranchProbability(1, 1);
if (SubTreeCost > MBFI.getBlockFreq(Block) * MarginProb) {
// Hoist: Move spills to current Block.
- for (const auto SpillBB : SpillsInSubTree) {
+ for (auto *const SpillBB : SpillsInSubTree) {
// When SpillBB is a BB contains original spill, insert the spill
// to SpillsToRm.
if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() &&
@@ -1609,7 +1605,7 @@ void HoistSpillHelper::hoistAllSpills() {
// Remove redundant spills or change them to dead instructions.
NumSpills -= SpillsToRm.size();
- for (auto const RMEnt : SpillsToRm) {
+ for (auto *const RMEnt : SpillsToRm) {
RMEnt->setDesc(TII.get(TargetOpcode::KILL));
for (unsigned i = RMEnt->getNumOperands(); i; --i) {
MachineOperand &MO = RMEnt->getOperand(i - 1);
@@ -1617,7 +1613,7 @@ void HoistSpillHelper::hoistAllSpills() {
RMEnt->removeOperand(i - 1);
}
}
- Edit.eliminateDeadDefs(SpillsToRm, None, AA);
+ Edit.eliminateDeadDefs(SpillsToRm, None);
}
}
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 55f3ad796291..0582378be4cd 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -541,7 +541,7 @@ bool InterleavedAccess::runOnFunction(Function &F) {
Changed |= lowerInterleavedStore(SI, DeadInsts);
}
- for (auto I : DeadInsts)
+ for (auto *I : DeadInsts)
I->eraseFromParent();
return Changed;
diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 43858071025a..a0f304659bca 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -528,8 +528,8 @@ public:
if (B.size() != o.B.size())
return false;
- auto ob = o.B.begin();
- for (auto &b : B) {
+ auto *ob = o.B.begin();
+ for (const auto &b : B) {
if (b != *ob)
return false;
ob++;
@@ -1154,7 +1154,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
// Test if all participating instruction will be dead after the
// transformation. If intermediate results are used, no performance gain can
// be expected. Also sum the cost of the Instructions beeing left dead.
- for (auto &I : Is) {
+ for (const auto &I : Is) {
// Compute the old cost
InstructionCost += TTI.getInstructionCost(I, CostKind);
@@ -1182,7 +1182,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
// that the corresponding defining access dominates first LI. This guarantees
// that there are no aliasing stores in between the loads.
auto FMA = MSSA.getMemoryAccess(First);
- for (auto LI : LIs) {
+ for (auto *LI : LIs) {
auto MADef = MSSA.getMemoryAccess(LI)->getDefiningAccess();
if (!MSSA.dominates(MADef, FMA))
return false;
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 43c12c67939e..ef49d3888f2b 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -284,7 +284,7 @@ public:
// Initialized the preferred-location map with illegal locations, to be
// filled in later.
- for (auto &VLoc : VLocs)
+ for (const auto &VLoc : VLocs)
if (VLoc.second.Kind == DbgValue::Def)
ValueToLoc.insert({VLoc.second.ID, LocIdx::MakeIllegalLoc()});
@@ -507,7 +507,7 @@ public:
// date. Wipe old tracking data for the location if it's been clobbered in
// the meantime.
if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) {
- for (auto &P : ActiveMLocs[NewLoc]) {
+ for (const auto &P : ActiveMLocs[NewLoc]) {
ActiveVLocs.erase(P);
}
ActiveMLocs[NewLoc.asU64()].clear();
@@ -560,7 +560,7 @@ public:
// explicitly undef, then stop here.
if (!NewLoc && !MakeUndef) {
// Try and recover a few more locations with entry values.
- for (auto &Var : ActiveMLocIt->second) {
+ for (const auto &Var : ActiveMLocIt->second) {
auto &Prop = ActiveVLocs.find(Var)->second.Properties;
recoverAsEntryValue(Var, Prop, OldValue);
}
@@ -570,7 +570,7 @@ public:
// Examine all the variables based on this location.
DenseSet<DebugVariable> NewMLocs;
- for (auto &Var : ActiveMLocIt->second) {
+ for (const auto &Var : ActiveMLocIt->second) {
auto ActiveVLocIt = ActiveVLocs.find(Var);
// Re-state the variable location: if there's no replacement then NewLoc
// is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE
@@ -623,7 +623,7 @@ public:
VarLocs[Dst.asU64()] = VarLocs[Src.asU64()];
// For each variable based on Src; create a location at Dst.
- for (auto &Var : MovingVars) {
+ for (const auto &Var : MovingVars) {
auto ActiveVLocIt = ActiveVLocs.find(Var);
assert(ActiveVLocIt != ActiveVLocs.end());
ActiveVLocIt->second.Loc = Dst;
@@ -1224,7 +1224,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
// FIXME: no index for this?
Register Reg = MTracker->LocIdxToLocID[L];
const TargetRegisterClass *TRC = nullptr;
- for (auto *TRCI : TRI->regclasses())
+ for (const auto *TRCI : TRI->regclasses())
if (TRCI->contains(Reg))
TRC = TRCI;
assert(TRC && "Couldn't find target register class?");
@@ -1454,7 +1454,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
for (uint32_t DeadReg : DeadRegs)
MTracker->defReg(DeadReg, CurBB, CurInst);
- for (auto *MO : RegMaskPtrs)
+ for (const auto *MO : RegMaskPtrs)
MTracker->writeRegMask(MO, CurBB, CurInst);
// If this instruction writes to a spill slot, def that slot.
@@ -1493,7 +1493,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
if (IgnoreSPAlias(Reg))
continue;
- for (auto *MO : RegMaskPtrs)
+ for (const auto *MO : RegMaskPtrs)
if (MO->clobbersPhysReg(Reg))
TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
}
@@ -1822,7 +1822,7 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
// Otherwise, examine all other seen fragments for this variable, with "this"
// fragment being a previously unseen fragment. Record any pair of
// overlapping fragments.
- for (auto &ASeenFragment : AllSeenFragments) {
+ for (const auto &ASeenFragment : AllSeenFragments) {
// Does this previously seen fragment overlap?
if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) {
// Yes: Mark the current fragment as being overlapped.
@@ -1993,7 +1993,7 @@ bool InstrRefBasedLDV::mlocJoin(
// redundant PHI that we can eliminate.
SmallVector<const MachineBasicBlock *, 8> BlockOrders;
- for (auto Pred : MBB.predecessors())
+ for (auto *Pred : MBB.predecessors())
BlockOrders.push_back(Pred);
// Visit predecessors in RPOT order.
@@ -2313,7 +2313,7 @@ void InstrRefBasedLDV::buildMLocValueMap(
// All successors should be visited: put any back-edges on the pending
// list for the next pass-through, and any other successors to be
// visited this pass, if they're not going to be already.
- for (auto s : MBB->successors()) {
+ for (auto *s : MBB->successors()) {
// Does branching to this successor represent a back-edge?
if (BBToOrder[s] > BBToOrder[MBB]) {
// No: visit it during this dataflow iteration.
@@ -2367,7 +2367,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
if (BlockOrders.empty())
return None;
- for (auto p : BlockOrders) {
+ for (const auto *p : BlockOrders) {
unsigned ThisBBNum = p->getNumber();
auto OutValIt = LiveOuts.find(p);
if (OutValIt == LiveOuts.end())
@@ -2422,7 +2422,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
// Check that all properties are the same. We can't pick a location if they're
// not.
const DbgValueProperties *Properties0 = Properties[0];
- for (auto *Prop : Properties)
+ for (const auto *Prop : Properties)
if (*Prop != *Properties0)
return None;
@@ -2472,7 +2472,7 @@ bool InstrRefBasedLDV::vlocJoin(
SmallVector<InValueT, 8> Values;
bool Bail = false;
int BackEdgesStart = 0;
- for (auto p : BlockOrders) {
+ for (auto *p : BlockOrders) {
// If the predecessor isn't in scope / to be explored, we'll never be
// able to join any locations.
if (!BlocksToExplore.contains(p)) {
@@ -2577,7 +2577,7 @@ void InstrRefBasedLDV::getBlocksForScope(
// instructions in scope at all. To accurately replicate VarLoc
// LiveDebugValues, this means exploring all artificial successors too.
// Perform a depth-first-search to enumerate those blocks.
- for (auto *MBB : BlocksToExplore) {
+ for (const auto *MBB : BlocksToExplore) {
// Depth-first-search state: each node is a block and which successor
// we're currently exploring.
SmallVector<std::pair<const MachineBasicBlock *,
@@ -2662,7 +2662,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
MutBlocksToExplore.insert(const_cast<MachineBasicBlock *>(MBB));
// Picks out relevants blocks RPO order and sort them.
- for (auto *MBB : BlocksToExplore)
+ for (const auto *MBB : BlocksToExplore)
BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB));
llvm::sort(BlockOrders, Cmp);
@@ -2696,7 +2696,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
// between blocks. This keeps the locality of working on one lexical scope at
// at time, but avoids re-processing variable values because some other
// variable has been assigned.
- for (auto &Var : VarsWeCareAbout) {
+ for (const auto &Var : VarsWeCareAbout) {
// Re-initialize live-ins and live-outs, to clear the remains of previous
// variables live-ins / live-outs.
for (unsigned int I = 0; I < NumBlocks; ++I) {
@@ -2823,7 +2823,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
// We should visit all successors. Ensure we'll visit any non-backedge
// successors during this dataflow iteration; book backedge successors
// to be visited next time around.
- for (auto s : MBB->successors()) {
+ for (auto *s : MBB->successors()) {
// Ignore out of scope / not-to-be-explored successors.
if (LiveInIdx.find(s) == LiveInIdx.end())
continue;
@@ -2906,7 +2906,7 @@ void InstrRefBasedLDV::placePHIsForSingleVarDefinition(
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void InstrRefBasedLDV::dump_mloc_transfer(
const MLocTransferMap &mloc_transfer) const {
- for (auto &P : mloc_transfer) {
+ for (const auto &P : mloc_transfer) {
std::string foo = MTracker->LocIdxToName(P.first);
std::string bar = MTracker->IDAsString(P.second);
dbgs() << "Loc " << foo << " --> " << bar << "\n";
@@ -2993,7 +2993,7 @@ void InstrRefBasedLDV::makeDepthFirstEjectionMap(
if (DILocationIt != ScopeToDILocation.end()) {
getBlocksForScope(DILocationIt->second, BlocksToExplore,
ScopeToAssignBlocks.find(WS)->second);
- for (auto *MBB : BlocksToExplore) {
+ for (const auto *MBB : BlocksToExplore) {
unsigned BBNum = MBB->getNumber();
if (EjectionMap[BBNum] == 0)
EjectionMap[BBNum] = WS->getDFSOut();
@@ -3100,7 +3100,7 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
getBlocksForScope(DILocationIt->second, BlocksToExplore,
ScopeToAssignBlocks.find(WS)->second);
- for (auto *MBB : BlocksToExplore)
+ for (const auto *MBB : BlocksToExplore)
if (WS->getDFSOut() == EjectionMap[MBB->getNumber()])
EjectBlock(const_cast<MachineBasicBlock &>(*MBB));
@@ -3709,10 +3709,9 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
for (auto &PHI : CreatedPHIs)
SortedPHIs.push_back(PHI);
- std::sort(
- SortedPHIs.begin(), SortedPHIs.end(), [&](LDVSSAPhi *A, LDVSSAPhi *B) {
- return BBToOrder[&A->getParent()->BB] < BBToOrder[&B->getParent()->BB];
- });
+ llvm::sort(SortedPHIs, [&](LDVSSAPhi *A, LDVSSAPhi *B) {
+ return BBToOrder[&A->getParent()->BB] < BBToOrder[&B->getParent()->BB];
+ });
for (auto &PHI : SortedPHIs) {
ValueIDNum ThisBlockValueNum =
diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index 24c00b8a10ec..32e07eb77efe 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -1874,7 +1874,7 @@ void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI,
// Otherwise, examine all other seen fragments for this variable, with "this"
// fragment being a previously unseen fragment. Record any pair of
// overlapping fragments.
- for (auto &ASeenFragment : AllSeenFragments) {
+ for (const auto &ASeenFragment : AllSeenFragments) {
// Does this previously seen fragment overlap?
if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) {
// Yes: Mark the current fragment as being overlapped.
@@ -1922,7 +1922,7 @@ bool VarLocBasedLDV::join(
// For all predecessors of this MBB, find the set of VarLocs that
// can be joined.
int NumVisited = 0;
- for (auto p : MBB.predecessors()) {
+ for (auto *p : MBB.predecessors()) {
// Ignore backedges if we have not visited the predecessor yet. As the
// predecessor hasn't yet had locations propagated into it, most locations
// will not yet be valid, so treat them as all being uninitialized and
@@ -2246,7 +2246,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF,
if (OLChanged) {
OLChanged = false;
- for (auto s : MBB->successors())
+ for (auto *s : MBB->successors())
if (OnPending.insert(s).second) {
Pending.push(BBToOrder[s]);
}
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 35cf25330186..574c0f98161e 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -1891,7 +1891,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
// insert position, insert all instructions at the same SlotIdx. They are
// guaranteed to appear in-sequence in StashedDebugInstrs because we insert
// them in order.
- for (auto StashIt = StashedDebugInstrs.begin();
+ for (auto *StashIt = StashedDebugInstrs.begin();
StashIt != StashedDebugInstrs.end(); ++StashIt) {
SlotIndex Idx = StashIt->Idx;
MachineBasicBlock *MBB = StashIt->MBB;
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 1242ce20b732..8a76048bb8c4 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveVariables.h"
@@ -60,9 +59,8 @@ using namespace llvm;
char LiveIntervals::ID = 0;
char &llvm::LiveIntervalsID = LiveIntervals::ID;
-INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
- "Live Interval Analysis", false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
@@ -87,8 +85,6 @@ cl::opt<bool> UseSegmentSetForPhysRegs(
void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
AU.addRequiredTransitiveID(MachineDominatorsID);
@@ -126,7 +122,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
MRI = &MF->getRegInfo();
TRI = MF->getSubtarget().getRegisterInfo();
TII = MF->getSubtarget().getInstrInfo();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
@@ -1417,7 +1412,7 @@ private:
NewIdxDef.getRegSlot(), (NewIdxOut + 1)->end, OldIdxVNI);
OldIdxVNI->def = NewIdxDef;
// Modify subsequent segments to be defined by the moved def OldIdxVNI.
- for (auto Idx = NewIdxOut + 2; Idx <= OldIdxOut; ++Idx)
+ for (auto *Idx = NewIdxOut + 2; Idx <= OldIdxOut; ++Idx)
Idx->valno = OldIdxVNI;
// Aggressively remove all dead flags from the former dead definition.
// Kill/dead flags shouldn't be used while live intervals exist; they
@@ -1662,7 +1657,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
ArrayRef<Register> OrigRegs) {
// Find anchor points, which are at the beginning/end of blocks or at
// instructions that already have indexes.
- while (Begin != MBB->begin() && !Indexes->hasIndex(*Begin))
+ while (Begin != MBB->begin() && !Indexes->hasIndex(*std::prev(Begin)))
--Begin;
while (End != MBB->end() && !Indexes->hasIndex(*End))
++End;
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 58eb4110f153..2aafb746aa2c 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -68,17 +68,16 @@ Register LiveRangeEdit::createFrom(Register OldReg) {
}
bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
- const MachineInstr *DefMI,
- AAResults *aa) {
+ const MachineInstr *DefMI) {
assert(DefMI && "Missing instruction");
ScannedRemattable = true;
- if (!TII.isTriviallyReMaterializable(*DefMI, aa))
+ if (!TII.isTriviallyReMaterializable(*DefMI))
return false;
Remattable.insert(VNI);
return true;
}
-void LiveRangeEdit::scanRemattable(AAResults *aa) {
+void LiveRangeEdit::scanRemattable() {
for (VNInfo *VNI : getParent().valnos) {
if (VNI->isUnused())
continue;
@@ -90,14 +89,14 @@ void LiveRangeEdit::scanRemattable(AAResults *aa) {
MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
if (!DefMI)
continue;
- checkRematerializable(OrigVNI, DefMI, aa);
+ checkRematerializable(OrigVNI, DefMI);
}
ScannedRemattable = true;
}
-bool LiveRangeEdit::anyRematerializable(AAResults *aa) {
+bool LiveRangeEdit::anyRematerializable() {
if (!ScannedRemattable)
- scanRemattable(aa);
+ scanRemattable();
return !Remattable.empty();
}
@@ -274,8 +273,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
}
/// Find all live intervals that need to shrink, then remove the instruction.
-void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
- AAResults *AA) {
+void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
assert(MI->allDefsAreDead() && "Def isn't really dead");
SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
@@ -384,7 +382,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// register uses. That may provoke RA to split an interval at the KILL
// and later result in an invalid live segment end.
if (isOrigDef && DeadRemats && !HasLiveVRegUses &&
- TII.isTriviallyReMaterializable(*MI, AA)) {
+ TII.isTriviallyReMaterializable(*MI)) {
LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false);
VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
@@ -414,14 +412,13 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
}
void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
- ArrayRef<Register> RegsBeingSpilled,
- AAResults *AA) {
+ ArrayRef<Register> RegsBeingSpilled) {
ToShrinkSet ToShrink;
for (;;) {
// Erase all dead defs.
while (!Dead.empty())
- eliminateDeadDef(Dead.pop_back_val(), ToShrink, AA);
+ eliminateDeadDef(Dead.pop_back_val(), ToShrink);
if (ToShrink.empty())
break;
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index 94bdfab5e5e0..40250171fe1e 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -758,8 +758,7 @@ void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI,
/// removeVirtualRegistersKilled - Remove all killed info for the specified
/// instruction.
void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isKill()) {
MO.setIsKill(false);
Register Reg = MO.getReg();
diff --git a/llvm/lib/CodeGen/LowerEmuTLS.cpp b/llvm/lib/CodeGen/LowerEmuTLS.cpp
index 984dc452fbfd..a517ee3794ca 100644
--- a/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -78,7 +78,7 @@ bool LowerEmuTLS::runOnModule(Module &M) {
if (G.isThreadLocal())
TlsVars.append({&G});
}
- for (const auto G : TlsVars)
+ for (const auto *const G : TlsVars)
Changed |= addEmuTlsVar(M, G);
return Changed;
}
diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index eea24d8e9353..3e7b4dbc9d71 100644
--- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -129,7 +129,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
// Calculates the distance of MI from the beginning of its parent BB.
auto getInstrIdx = [](const MachineInstr &MI) {
unsigned i = 0;
- for (auto &CurMI : *MI.getParent()) {
+ for (const auto &CurMI : *MI.getParent()) {
if (&CurMI == &MI)
return i;
i++;
@@ -416,7 +416,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
MachineRegisterInfo &MRI = MF.getRegInfo();
VRegRenamer Renamer(MRI);
- for (auto MBB : RPOList)
+ for (auto *MBB : RPOList)
Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
return Changed;
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 0c94e1f7e474..e3d6b59c5077 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -3383,7 +3383,7 @@ static void initSlots2BasicBlocks(
DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
MST.incorporateFunction(F);
- for (auto &BB : F) {
+ for (const auto &BB : F) {
if (BB.hasName())
continue;
int Slot = MST.getLocalSlot(&BB);
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 4944cb46c5b5..aa9522bc3459 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -437,7 +437,7 @@ void MIRParserImpl::setupDebugValueTracking(
MF.setDebugInstrNumberingCount(MaxInstrNum);
// Load any substitutions.
- for (auto &Sub : YamlMF.DebugValueSubstitutions) {
+ for (const auto &Sub : YamlMF.DebugValueSubstitutions) {
MF.makeDebugValueSubstitution({Sub.SrcInst, Sub.SrcOp},
{Sub.DstInst, Sub.DstOp}, Sub.Subreg);
}
@@ -975,7 +975,7 @@ bool MIRParserImpl::parseMachineMetadata(PerFunctionMIParsingState &PFS,
bool MIRParserImpl::parseMachineMetadataNodes(
PerFunctionMIParsingState &PFS, MachineFunction &MF,
const yaml::MachineFunction &YMF) {
- for (auto &MDS : YMF.MachineMetadataNodes) {
+ for (const auto &MDS : YMF.MachineMetadataNodes) {
if (parseMachineMetadata(PFS, MDS))
return true;
}
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index 7daf9025d303..d21d552227cf 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -13,10 +13,9 @@
#include "AllocationOrder.h"
#include "RegAllocEvictionAdvisor.h"
#include "RegAllocGreedy.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/TensorSpec.h"
-#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API)
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API)
#include "llvm/Analysis/ModelUnderTrainingRunner.h"
#include "llvm/Analysis/NoInferenceModelRunner.h"
#endif
@@ -91,7 +90,6 @@ public:
AU.setPreservesAll();
AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<AAResultsWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -891,9 +889,7 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
&getAnalysis<RegAllocEvictionAdvisorAnalysis>()))
if (auto *Log = DevModeAnalysis->getLogger(MF))
Log->logFloatFinalReward(static_cast<float>(
- calculateRegAllocScore(
- MF, getAnalysis<MachineBlockFrequencyInfo>(),
- getAnalysis<AAResultsWrapperPass>().getAAResults())
+ calculateRegAllocScore(MF, getAnalysis<MachineBlockFrequencyInfo>())
.getScore()));
return false;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 02c44fa85cd9..7381c7e6b09c 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1436,7 +1436,7 @@ MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const {
// ditribute the complemental of the sum to each unknown probability.
unsigned KnownProbNum = 0;
auto Sum = BranchProbability::getZero();
- for (auto &P : Probs) {
+ for (const auto &P : Probs) {
if (!P.isUnknown()) {
Sum += P;
KnownProbNum++;
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 4cc84f22bdde..9ff5c37627b4 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -965,7 +965,7 @@ bool MachineBlockPlacement::isTrellis(
for (MachineBasicBlock *Succ : ViableSuccs) {
int PredCount = 0;
- for (auto SuccPred : Succ->predecessors()) {
+ for (auto *SuccPred : Succ->predecessors()) {
// Allow triangle successors, but don't count them.
if (Successors.count(SuccPred)) {
// Make sure that it is actually a triangle.
@@ -1063,7 +1063,7 @@ MachineBlockPlacement::getBestTrellisSuccessor(
// Collect the edge frequencies of all edges that form the trellis.
SmallVector<WeightedEdge, 8> Edges[2];
int SuccIndex = 0;
- for (auto Succ : ViableSuccs) {
+ for (auto *Succ : ViableSuccs) {
for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
// Skip any placed predecessors that are not BB
if (SuccPred != BB)
@@ -2451,7 +2451,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
// as the sum of frequencies of exit edges we collect here, excluding the exit
// edge from the tail of the loop chain.
SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq;
- for (auto BB : LoopChain) {
+ for (auto *BB : LoopChain) {
auto LargestExitEdgeProb = BranchProbability::getZero();
for (auto *Succ : BB->successors()) {
BlockChain *SuccChain = BlockToChain[Succ];
@@ -2561,7 +2561,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
// profile data is available.
if (F->getFunction().hasProfileData() || ForceLoopColdBlock) {
BlockFrequency LoopFreq(0);
- for (auto LoopPred : L.getHeader()->predecessors())
+ for (auto *LoopPred : L.getHeader()->predecessors())
if (!L.contains(LoopPred))
LoopFreq += MBFI->getBlockFreq(LoopPred) *
MBPI->getEdgeProbability(LoopPred, L.getHeader());
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index e60fd9f7883a..c6756b1d3737 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -415,7 +415,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
// Okay, this instruction does a load. As a refinement, we allow the target
// to decide whether the loaded value is actually a constant. If so, we can
// actually use it as a load.
- if (!MI->isDereferenceableInvariantLoad(AA))
+ if (!MI->isDereferenceableInvariantLoad())
// FIXME: we should be able to hoist loads with no other side effects if
// there are no other instructions which can change memory in this loop.
// This is a trivial form of alias analysis.
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 722a709af240..57e2cd20bdd0 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -92,6 +92,7 @@ private:
bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize);
bool combineInstructions(MachineBasicBlock *);
MachineInstr *getOperandDef(const MachineOperand &MO);
+ bool isTransientMI(const MachineInstr *MI);
unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
MachineTraceMetrics::Trace BlockTrace);
@@ -158,6 +159,43 @@ MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
return DefInstr;
}
+/// Return true if MI is unlikely to generate an actual target instruction.
+bool MachineCombiner::isTransientMI(const MachineInstr *MI) {
+ if (!MI->isCopy())
+ return MI->isTransient();
+
+ // If MI is a COPY, check if its src and dst registers can be coalesced.
+ Register Dst = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
+
+ if (!MI->isFullCopy()) {
+ // If src RC contains super registers of dst RC, it can also be coalesced.
+ if (MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical())
+ return false;
+
+ auto SrcSub = MI->getOperand(1).getSubReg();
+ auto SrcRC = MRI->getRegClass(Src);
+ auto DstRC = MRI->getRegClass(Dst);
+ return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) != nullptr;
+ }
+
+ if (Src.isPhysical() && Dst.isPhysical())
+ return Src == Dst;
+
+ if (Src.isVirtual() && Dst.isVirtual()) {
+ auto SrcRC = MRI->getRegClass(Src);
+ auto DstRC = MRI->getRegClass(Dst);
+ return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC);
+ }
+
+ if (Src.isVirtual())
+ std::swap(Src, Dst);
+
+ // Now Src is physical register, Dst is virtual register.
+ auto DstRC = MRI->getRegClass(Dst);
+ return DstRC->contains(Src);
+}
+
/// Computes depth of instructions in vector \InsInstr.
///
/// \param InsInstrs is a vector of machine instructions
@@ -204,9 +242,10 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
MachineInstr *DefInstr = getOperandDef(MO);
if (DefInstr) {
DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
- LatencyOp = TSchedModel.computeOperandLatency(
- DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
- InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ if (!isTransientMI(DefInstr))
+ LatencyOp = TSchedModel.computeOperandLatency(
+ DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
+ InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
}
}
IDepth = std::max(IDepth, DepthOp + LatencyOp);
@@ -305,7 +344,7 @@ std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
NewRootLatency += getLatency(&MI, NewRoot, BlockTrace);
unsigned RootLatency = 0;
- for (auto I : DelInstrs)
+ for (auto *I : DelInstrs)
RootLatency += TSchedModel.computeInstrLatency(I);
return {NewRootLatency, RootLatency};
@@ -488,7 +527,7 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
for (auto *InstrPtr : DelInstrs) {
InstrPtr->eraseFromParent();
// Erase all LiveRegs defined by the removed instruction
- for (auto I = RegUnits.begin(); I != RegUnits.end(); ) {
+ for (auto *I = RegUnits.begin(); I != RegUnits.end();) {
if (I->MI == InstrPtr)
I = RegUnits.erase(I);
else
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index ca5936a14779..f0190812389f 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -127,7 +127,7 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
BV.set(*CSR);
// Saved CSRs are not pristine.
- for (auto &I : getCalleeSavedInfo())
+ for (const auto &I : getCalleeSavedInfo())
for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
BV.reset(*S);
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index f58996ea90c6..6b481a374382 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -911,8 +911,8 @@ static const MachineInstr *getCallInstr(const MachineInstr *MI) {
if (!MI->isBundle())
return MI;
- for (auto &BMI : make_range(getBundleStart(MI->getIterator()),
- getBundleEnd(MI->getIterator())))
+ for (const auto &BMI : make_range(getBundleStart(MI->getIterator()),
+ getBundleEnd(MI->getIterator())))
if (BMI.isCandidateForCallSiteEntry())
return &BMI;
diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 867a7ed584b2..3e1aace855a5 100644
--- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -146,7 +146,7 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
return X.getSectionID().Type < Y.getSectionID().Type;
};
llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
-
+ llvm::avoidZeroOffsetLandingPad(MF);
return true;
}
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 31f45e194a97..e92dec5bea48 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1203,7 +1203,7 @@ bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const {
// destination. The check for isInvariantLoad gives the target the chance to
// classify the load as always returning a constant, e.g. a constant pool
// load.
- if (mayLoad() && !isDereferenceableInvariantLoad(AA))
+ if (mayLoad() && !isDereferenceableInvariantLoad())
// Otherwise, this is a real load. If there is a store between the load and
// end of block, we can't move it.
return !SawStore;
@@ -1348,7 +1348,7 @@ bool MachineInstr::hasOrderedMemoryRef() const {
/// isDereferenceableInvariantLoad - Return true if this instruction will never
/// trap and is loading from a location whose value is invariant across a run of
/// this function.
-bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const {
+bool MachineInstr::isDereferenceableInvariantLoad() const {
// If the instruction doesn't load at all, it isn't an invariant load.
if (!mayLoad())
return false;
@@ -1374,12 +1374,6 @@ bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const {
if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
if (PSV->isConstant(&MFI))
continue;
- } else if (const Value *V = MMO->getValue()) {
- // If we have an AliasAnalysis, ask it whether the memory is constant.
- if (AA &&
- AA->pointsToConstantMemory(
- MemoryLocation(V, MMO->getSize(), MMO->getAAInfo())))
- continue;
}
// Otherwise assume conservatively.
@@ -2273,7 +2267,7 @@ using MMOList = SmallVector<const MachineMemOperand *, 2>;
static unsigned getSpillSlotSize(const MMOList &Accesses,
const MachineFrameInfo &MFI) {
unsigned Size = 0;
- for (auto A : Accesses)
+ for (const auto *A : Accesses)
if (MFI.isSpillSlotObjectIndex(
cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
->getFrameIndex()))
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 00d75f8231c7..df7b6c782b91 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -230,8 +230,7 @@ namespace {
bool IsGuaranteedToExecute(MachineBasicBlock *BB);
- bool isTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const;
+ bool isTriviallyReMaterializable(const MachineInstr &MI) const;
void EnterScope(MachineBasicBlock *MBB);
@@ -666,9 +665,9 @@ bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
/// virtual register uses. Even though rematerializable RA might not actually
/// rematerialize it in this scenario. In that case we do not want to hoist such
/// instruction out of the loop in a belief RA will sink it back if needed.
-bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const {
- if (!TII->isTriviallyReMaterializable(MI, AA))
+bool MachineLICMBase::isTriviallyReMaterializable(
+ const MachineInstr &MI) const {
+ if (!TII->isTriviallyReMaterializable(MI))
return false;
for (const MachineOperand &MO : MI.operands()) {
@@ -1174,7 +1173,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
// Rematerializable instructions should always be hoisted providing the
// register allocator can just pull them down again when needed.
- if (isTriviallyReMaterializable(MI, AA))
+ if (isTriviallyReMaterializable(MI))
return true;
// FIXME: If there are long latency loop-invariant instructions inside the
@@ -1227,8 +1226,8 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
- if (!isTriviallyReMaterializable(MI, AA) &&
- !MI.isDereferenceableInvariantLoad(AA)) {
+ if (!isTriviallyReMaterializable(MI) &&
+ !MI.isDereferenceableInvariantLoad()) {
LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
return false;
}
@@ -1247,7 +1246,7 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
// If not, we may be able to unfold a load and hoist that.
// First test whether the instruction is loading from an amenable
// memory location.
- if (!MI->isDereferenceableInvariantLoad(AA))
+ if (!MI->isDereferenceableInvariantLoad())
return nullptr;
// Next determine the register class for a temporary register.
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 8d500398f55e..52501ca7c871 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -219,7 +219,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
RegClassInfo.runOnMachineFunction(*MF);
- for (auto &L : *MLI)
+ for (const auto &L : *MLI)
scheduleLoop(*L);
return false;
@@ -231,7 +231,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
/// the loop.
bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
bool Changed = false;
- for (auto &InnerLoop : L)
+ for (const auto &InnerLoop : L)
Changed |= scheduleLoop(*InnerLoop);
#ifndef NDEBUG
@@ -689,7 +689,7 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
Worklist.push_back(SUa);
while (!Worklist.empty()) {
const SUnit *SU = Worklist.pop_back_val();
- for (auto &SI : SU->Succs) {
+ for (const auto &SI : SU->Succs) {
SUnit *SuccSU = SI.getSUnit();
if (SI.getKind() == SDep::Order) {
if (Visited.count(SuccSU))
@@ -706,11 +706,11 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
/// Return true if the instruction causes a chain between memory
/// references before and after it.
-static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
+static bool isDependenceBarrier(MachineInstr &MI) {
return MI.isCall() || MI.mayRaiseFPException() ||
MI.hasUnmodeledSideEffects() ||
(MI.hasOrderedMemoryRef() &&
- (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
+ (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad()));
}
/// Return the underlying objects for the memory references of an instruction.
@@ -743,14 +743,14 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
for (auto &SU : SUnits) {
MachineInstr &MI = *SU.getInstr();
- if (isDependenceBarrier(MI, AA))
+ if (isDependenceBarrier(MI))
PendingLoads.clear();
else if (MI.mayLoad()) {
SmallVector<const Value *, 4> Objs;
::getUnderlyingObjects(&MI, Objs);
if (Objs.empty())
Objs.push_back(UnknownValue);
- for (auto V : Objs) {
+ for (const auto *V : Objs) {
SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
SUs.push_back(&SU);
}
@@ -759,12 +759,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
::getUnderlyingObjects(&MI, Objs);
if (Objs.empty())
Objs.push_back(UnknownValue);
- for (auto V : Objs) {
+ for (const auto *V : Objs) {
MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I =
PendingLoads.find(V);
if (I == PendingLoads.end())
continue;
- for (auto Load : I->second) {
+ for (auto *Load : I->second) {
if (isSuccOrder(Load, &SU))
continue;
MachineInstr &LdMI = *Load->getInstr();
@@ -1407,8 +1407,8 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
// Add the artificial dependencies if it does not form a cycle.
- for (auto I : UseSUs) {
- for (auto Src : SrcSUs) {
+ for (auto *I : UseSUs) {
+ for (auto *Src : SrcSUs) {
if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
Src->addPred(SDep(I, SDep::Artificial));
SDAG->Topo.AddPred(Src, I);
@@ -1878,7 +1878,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
Order = TopDown;
LLVM_DEBUG(dbgs() << " Top down (intersect) ");
} else if (NodeSets.size() == 1) {
- for (auto &N : Nodes)
+ for (const auto &N : Nodes)
if (N->Succs.size() == 0)
R.insert(N);
Order = BottomUp;
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 5f80445a5a34..96131dc2983e 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1698,7 +1698,7 @@ void BaseMemOpClusterMutation::collectMemOpRecords(
<< ", Width: " << Width << "\n");
}
#ifndef NDEBUG
- for (auto *Op : BaseOps)
+ for (const auto *Op : BaseOps)
assert(Op);
#endif
}
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 006ba9273dfb..0568bc6a4600 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -446,7 +446,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
MadeChange |= ProcessBlock(MBB);
// If we have anything we marked as toSplit, split it now.
- for (auto &Pair : ToSplit) {
+ for (const auto &Pair : ToSplit) {
auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
if (NewSucc != nullptr) {
LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index a85dbf1de1ee..b546a5082b07 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -200,7 +200,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
SmallVector<stable_hash> HashComponents;
// TODO: Hash more stuff like block alignment and branch probabilities.
- for (auto &MI : MBB)
+ for (const auto &MI : MBB)
HashComponents.push_back(stableHashValue(MI));
return stable_hash_combine_range(HashComponents.begin(),
HashComponents.end());
@@ -209,7 +209,7 @@ stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
stable_hash llvm::stableHashValue(const MachineFunction &MF) {
SmallVector<stable_hash> HashComponents;
// TODO: Hash lots more stuff like function alignment and stack objects.
- for (auto &MBB : MF)
+ for (const auto &MBB : MF)
HashComponents.push_back(stableHashValue(MBB));
return stable_hash_combine_range(HashComponents.begin(),
HashComponents.end());
diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 0a5ff276fedc..715e5da26989 100644
--- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -484,7 +484,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
// Run an upwards post-order search for the trace start.
Bounds.Downward = false;
Bounds.Visited.clear();
- for (auto I : inverse_post_order_ext(MBB, Bounds)) {
+ for (const auto *I : inverse_post_order_ext(MBB, Bounds)) {
LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": ");
TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
// All the predecessors have been visited, pick the preferred one.
@@ -502,7 +502,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
// Run a downwards post-order search for the trace end.
Bounds.Downward = true;
Bounds.Visited.clear();
- for (auto I : post_order_ext(MBB, Bounds)) {
+ for (const auto *I : post_order_ext(MBB, Bounds)) {
LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": ");
TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
// All the successors have been visited, pick the preferred one.
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 7a008bae726e..93e68918b632 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -2802,8 +2802,8 @@ void MachineVerifier::visitMachineFunctionAfter() {
// tracking numbers.
if (MF->getFunction().getSubprogram()) {
DenseSet<unsigned> SeenNumbers;
- for (auto &MBB : *MF) {
- for (auto &MI : MBB) {
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
if (auto Num = MI.peekDebugInstrNum()) {
auto Result = SeenNumbers.insert((unsigned)Num);
if (!Result.second)
diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp
index ec383b9b1c65..51de99b81057 100644
--- a/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/llvm/lib/CodeGen/RDFGraph.cpp
@@ -1395,7 +1395,7 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM,
// Finally, add the set of defs to each block in the iterated dominance
// frontier.
- for (auto DB : IDF) {
+ for (auto *DB : IDF) {
NodeAddr<BlockNode*> DBA = findBlock(DB);
PhiM[DBA.Id].insert(Defs.begin(), Defs.end());
}
@@ -1657,7 +1657,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
// Recursively process all children in the dominator tree.
MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
- for (auto I : *N) {
+ for (auto *I : *N) {
MachineBasicBlock *SB = I->getBlock();
NodeAddr<BlockNode*> SBA = findBlock(SB);
linkBlockRefs(DefM, SBA);
diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp
index 2fd947086b4d..d8eac20d16b6 100644
--- a/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -61,7 +61,7 @@ namespace rdf {
raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) {
OS << '{';
- for (auto &I : P.Obj) {
+ for (const auto &I : P.Obj) {
OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{';
for (auto J = I.second.begin(), E = I.second.end(); J != E; ) {
OS << Print<NodeId>(J->first, P.G) << PrintLaneMaskOpt(J->second);
@@ -767,7 +767,7 @@ void Liveness::computeLiveIns() {
}
for (auto I : IDF)
- for (auto S : I.second)
+ for (auto *S : I.second)
IIDF[S].insert(I.first);
computePhiInfo();
@@ -926,7 +926,7 @@ void Liveness::resetKills(MachineBasicBlock *B) {
BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs());
CopyLiveIns(B, LiveIn);
- for (auto SI : B->successors())
+ for (auto *SI : B->successors())
CopyLiveIns(SI, Live);
for (MachineInstr &MI : llvm::reverse(*B)) {
@@ -1003,7 +1003,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
// Go up the dominator tree (depth-first).
MachineDomTreeNode *N = MDT.getNode(B);
- for (auto I : *N) {
+ for (auto *I : *N) {
RefMap L;
MachineBasicBlock *SB = I->getBlock();
traverse(SB, L);
@@ -1015,7 +1015,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
if (Trace) {
dbgs() << "\n-- " << printMBBReference(*B) << ": " << __func__
<< " after recursion into: {";
- for (auto I : *N)
+ for (auto *I : *N)
dbgs() << ' ' << I->getBlock()->getNumber();
dbgs() << " }\n";
dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
@@ -1155,7 +1155,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
dbgs() << " Local: " << Print<RegisterAggr>(Local, DFG) << '\n';
}
- for (auto C : IIDF[B]) {
+ for (auto *C : IIDF[B]) {
RegisterAggr &LiveC = LiveMap[C];
for (const std::pair<const RegisterId, NodeRefSet> &S : LiveIn)
for (auto R : S.second)
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 69db8bad54f9..d9ced9191fae 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -635,7 +635,7 @@ ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited,
SmallPtrSet<MachineInstr*, 4> Uses;
getGlobalUses(MI, MO.getReg(), Uses);
- for (auto I : Uses) {
+ for (auto *I : Uses) {
if (Ignore.count(I) || ToRemove.count(I))
continue;
if (!isSafeToRemove(I, Visited, ToRemove, Ignore))
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index 0c18814189eb..990dd84c829d 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -166,7 +166,7 @@ void RegAllocBase::allocatePhysRegs() {
void RegAllocBase::postOptimization() {
spiller().postOptimization();
- for (auto DeadInst : DeadRemats) {
+ for (auto *DeadInst : DeadRemats) {
LIS->RemoveMachineInstrFromMaps(*DeadInst);
DeadInst->eraseFromParent();
}
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index 7defdf04aec8..91795f3d27fe 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -135,6 +135,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 72ceaa768803..9e4e26f1392e 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -1478,7 +1478,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
- for (auto &LiveReg : MBB.liveouts())
+ for (const auto &LiveReg : MBB.liveouts())
setPhysRegState(LiveReg.PhysReg, regPreAssigned);
Coalesced.clear();
@@ -1580,8 +1580,7 @@ FunctionPass *llvm::createFastRegisterAllocator() {
return new RegAllocFast();
}
-FunctionPass *llvm::createFastRegisterAllocator(
- std::function<bool(const TargetRegisterInfo &TRI,
- const TargetRegisterClass &RC)> Ftor, bool ClearVirtRegs) {
+FunctionPass *llvm::createFastRegisterAllocator(RegClassFilterFunc Ftor,
+ bool ClearVirtRegs) {
return new RegAllocFast(Ftor, ClearVirtRegs);
}
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 2efb98ae200d..4a54d7ebf8a9 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -180,16 +180,7 @@ FunctionPass* llvm::createGreedyRegisterAllocator() {
return new RAGreedy();
}
-namespace llvm {
-FunctionPass* createGreedyRegisterAllocator(
- std::function<bool(const TargetRegisterInfo &TRI,
- const TargetRegisterClass &RC)> Ftor);
-
-}
-
-FunctionPass* llvm::createGreedyRegisterAllocator(
- std::function<bool(const TargetRegisterInfo &TRI,
- const TargetRegisterClass &RC)> Ftor) {
+FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) {
return new RAGreedy(Ftor);
}
@@ -202,8 +193,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addPreserved<MachineBlockFrequencyInfo>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
@@ -2530,7 +2519,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
Bundles = &getAnalysis<EdgeBundles>();
SpillPlacer = &getAnalysis<SpillPlacement>();
DebugVars = &getAnalysis<LiveDebugVariables>();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
initializeCSRCost();
@@ -2552,7 +2540,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(LIS->dump());
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
- SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
+ SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h
index 358e74541a54..316b12d0213b 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -25,7 +25,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
@@ -54,7 +53,6 @@ class MachineLoop;
class MachineLoopInfo;
class MachineOptimizationRemarkEmitter;
class MachineOptimizationRemarkMissed;
-class SlotIndex;
class SlotIndexes;
class TargetInstrInfo;
class VirtRegMap;
@@ -174,7 +172,6 @@ private:
EdgeBundles *Bundles;
SpillPlacement *SpillPlacer;
LiveDebugVariables *DebugVars;
- AliasAnalysis *AA;
// state
std::unique_ptr<Spiller> SpillerInstance;
diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 8c262130fb70..b3d926eeb552 100644
--- a/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -783,7 +783,7 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
VRegSpiller.postOptimization();
/// Remove dead defs because of rematerialization.
- for (auto DeadInst : DeadRemats) {
+ for (auto *DeadInst : DeadRemats) {
LIS.RemoveMachineInstrFromMaps(*DeadInst);
DeadInst->eraseFromParent();
}
diff --git a/llvm/lib/CodeGen/RegAllocScore.cpp b/llvm/lib/CodeGen/RegAllocScore.cpp
index 32fa5e07dd16..17e3eeef664b 100644
--- a/llvm/lib/CodeGen/RegAllocScore.cpp
+++ b/llvm/lib/CodeGen/RegAllocScore.cpp
@@ -74,8 +74,7 @@ double RegAllocScore::getScore() const {
RegAllocScore
llvm::calculateRegAllocScore(const MachineFunction &MF,
- const MachineBlockFrequencyInfo &MBFI,
- AAResults &AAResults) {
+ const MachineBlockFrequencyInfo &MBFI) {
return calculateRegAllocScore(
MF,
[&](const MachineBasicBlock &MBB) {
@@ -83,7 +82,7 @@ llvm::calculateRegAllocScore(const MachineFunction &MF,
},
[&](const MachineInstr &MI) {
return MF.getSubtarget().getInstrInfo()->isTriviallyReMaterializable(
- MI, &AAResults);
+ MI);
});
}
diff --git a/llvm/lib/CodeGen/RegAllocScore.h b/llvm/lib/CodeGen/RegAllocScore.h
index 2bcd0b5895bf..b80adae29f23 100644
--- a/llvm/lib/CodeGen/RegAllocScore.h
+++ b/llvm/lib/CodeGen/RegAllocScore.h
@@ -19,7 +19,6 @@
namespace llvm {
-class AAResults;
class MachineBasicBlock;
class MachineBlockFrequencyInfo;
class MachineFunction;
@@ -62,8 +61,7 @@ public:
/// different policies, the better policy would have a smaller score.
/// The implementation is the overload below (which is also easily unittestable)
RegAllocScore calculateRegAllocScore(const MachineFunction &MF,
- const MachineBlockFrequencyInfo &MBFI,
- AAResults &AAResults);
+ const MachineBlockFrequencyInfo &MBFI);
/// Implementation of the above, which is also more easily unittestable.
RegAllocScore calculateRegAllocScore(
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 930d05324440..8a6f823c8a0c 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1148,7 +1148,7 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
// we need to keep the copy of B = A at the end of Pred if we remove
// B = A from MBB.
bool ValB_Changed = false;
- for (auto VNI : IntB.valnos) {
+ for (auto *VNI : IntB.valnos) {
if (VNI->isUnused())
continue;
if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
@@ -1306,7 +1306,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
}
if (!TII->isAsCheapAsAMove(*DefMI))
return false;
- if (!TII->isTriviallyReMaterializable(*DefMI, AA))
+ if (!TII->isTriviallyReMaterializable(*DefMI))
return false;
if (!definesFullReg(*DefMI, SrcReg))
return false;
diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index 62a459fca611..b14a36e4eeb4 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -581,7 +581,7 @@ void RegisterOperands::collect(const MachineInstr &MI,
void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
const LiveIntervals &LIS) {
SlotIndex SlotIdx = LIS.getInstructionIndex(MI);
- for (auto RI = Defs.begin(); RI != Defs.end(); /*empty*/) {
+ for (auto *RI = Defs.begin(); RI != Defs.end(); /*empty*/) {
Register Reg = RI->RegUnit;
const LiveRange *LR = getLiveRange(LIS, Reg);
if (LR != nullptr) {
@@ -602,7 +602,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
const MachineRegisterInfo &MRI,
SlotIndex Pos,
MachineInstr *AddFlagsMI) {
- for (auto I = Defs.begin(); I != Defs.end(); ) {
+ for (auto *I = Defs.begin(); I != Defs.end();) {
LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
Pos.getDeadSlot());
// If the def is all that is live after the instruction, then in case
@@ -620,7 +620,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
++I;
}
}
- for (auto I = Uses.begin(); I != Uses.end(); ) {
+ for (auto *I = Uses.begin(); I != Uses.end();) {
LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
Pos.getBaseIndex());
LaneBitmask LaneMask = I->LaneMask & LiveBefore;
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
index e7116ec3ea28..00a551ade213 100644
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -340,7 +340,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
// analysis here, which would look at all uses of an argument inside
// the function being called.
auto B = CS.arg_begin(), E = CS.arg_end();
- for (auto A = B; A != E; ++A)
+ for (const auto *A = B; A != E; ++A)
if (A->get() == V)
if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) ||
CS.doesNotAccessMemory()))) {
@@ -498,7 +498,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
if (ClColoring)
SSC.run();
- for (auto *I : SSC.getMarkers()) {
+ for (const auto *I : SSC.getMarkers()) {
auto *Op = dyn_cast<Instruction>(I->getOperand(1));
const_cast<IntrinsicInst *>(I)->eraseFromParent();
// Remove the operand bitcast, too, if it has no more uses left.
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 07dcc34fbf15..4fc9399c2b9e 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -530,9 +530,9 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
/// Returns true if MI is an instruction we are unable to reason about
/// (like a call or something with unmodeled side effects).
-static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) {
+static inline bool isGlobalMemoryObject(MachineInstr *MI) {
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
- (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA));
+ (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
}
void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
@@ -880,7 +880,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
// actual addresses).
// This is a barrier event that acts as a pivotal node in the DAG.
- if (isGlobalMemoryObject(AA, &MI)) {
+ if (isGlobalMemoryObject(&MI)) {
// Become the barrier chain.
if (BarrierChain)
@@ -917,7 +917,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
// If it's not a store or a variant load, we're done.
if (!MI.mayStore() &&
- !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
+ !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad()))
continue;
// Always add dependecy edge to BarrierChain if present.
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index d627519a34aa..011f55efce1d 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -433,7 +433,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
DebugPseudoINS.push_back(&*DIt);
DIt++;
}
- for (auto DI : DebugPseudoINS) {
+ for (auto *DI : DebugPseudoINS) {
DI->moveBefore(&*EndBlock->getFirstInsertionPt());
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2654c00929d8..edb0756e8c3b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1868,8 +1868,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
// If N is a commutative binary node, try to eliminate it if the commuted
// version is already present in the DAG.
- if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
- N->getNumValues() == 1) {
+ if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4159,6 +4158,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
return RMUL;
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -5978,44 +5981,64 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
if (!TLI.isTypeLegal(VT))
return SDValue();
- // Look through an optional extension and find a 'not'.
- // TODO: Should we favor test+set even without the 'not' op?
- SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
- if (Not.getOpcode() == ISD::ANY_EXTEND)
- Not = Not.getOperand(0);
- if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
+ // Look through an optional extension.
+ SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
+ if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
+ And0 = And0.getOperand(0);
+ if (!isOneConstant(And1) || !And0.hasOneUse())
return SDValue();
- // Look though an optional truncation. The source operand may not be the same
- // type as the original 'and', but that is ok because we are masking off
- // everything but the low bit.
- SDValue Srl = Not.getOperand(0);
- if (Srl.getOpcode() == ISD::TRUNCATE)
- Srl = Srl.getOperand(0);
+ SDValue Src = And0;
+
+ // Attempt to find a 'not' op.
+ // TODO: Should we favor test+set even without the 'not' op?
+ bool FoundNot = false;
+ if (isBitwiseNot(Src)) {
+ FoundNot = true;
+ Src = Src.getOperand(0);
+
+ // Look though an optional truncation. The source operand may not be the
+ // same type as the original 'and', but that is ok because we are masking
+ // off everything but the low bit.
+ if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
+ Src = Src.getOperand(0);
+ }
// Match a shift-right by constant.
- if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
- !isa<ConstantSDNode>(Srl.getOperand(1)))
+ if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
return SDValue();
// We might have looked through casts that make this transform invalid.
// TODO: If the source type is wider than the result type, do the mask and
// compare in the source type.
- const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
- unsigned VTBitWidth = VT.getSizeInBits();
- if (ShiftAmt.uge(VTBitWidth))
+ unsigned VTBitWidth = VT.getScalarSizeInBits();
+ SDValue ShiftAmt = Src.getOperand(1);
+ auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
+ if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(VTBitWidth))
return SDValue();
- if (!TLI.hasBitTest(Srl.getOperand(0), Srl.getOperand(1)))
+ // Set source to shift source.
+ Src = Src.getOperand(0);
+
+ // Try again to find a 'not' op.
+ // TODO: Should we favor test+set even with two 'not' ops?
+ if (!FoundNot) {
+ if (!isBitwiseNot(Src))
+ return SDValue();
+ Src = Src.getOperand(0);
+ }
+
+ if (!TLI.hasBitTest(Src, ShiftAmt))
return SDValue();
// Turn this into a bit-test pattern using mask op + setcc:
// and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
+ // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
SDLoc DL(And);
- SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
+ SDValue X = DAG.getZExtOrTrunc(Src, DL, VT);
EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue Mask = DAG.getConstant(
- APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
+ APInt::getOneBitSet(VTBitWidth, ShiftAmtC->getZExtValue()), DL, VT);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
@@ -6229,7 +6252,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// This can be a pure constant or a vector splat, in which case we treat the
// vector as a scalar and use the splat value.
APInt Constant = APInt::getZero(1);
- if (const ConstantSDNode *C = isConstOrConstSplat(N1)) {
+ if (const ConstantSDNode *C = isConstOrConstSplat(
+ N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
APInt SplatValue, SplatUndef;
@@ -6339,18 +6363,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
- // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
- if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
- (N0.getOpcode() == ISD::ANY_EXTEND &&
- N0.getOperand(0).getOpcode() == ISD::LOAD))) {
- if (SDValue Res = reduceLoadWidth(N)) {
- LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
- ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
- AddToWorklist(N);
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
- return SDValue(N, 0);
- }
- }
+ if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
+ if (SDValue Res = reduceLoadWidth(N))
+ return Res;
if (LegalTypes) {
// Attempt to propagate the AND back up to the leaves which, if they're
@@ -6856,20 +6871,23 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
}
/// OR combines for which the commuted variant will be tried as well.
-static SDValue visitORCommutative(
- SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
+static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
+ SDNode *N) {
EVT VT = N0.getValueType();
if (N0.getOpcode() == ISD::AND) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
// TODO: Set AllowUndefs = true.
- if (getBitwiseNotOperand(N0.getOperand(1), N0.getOperand(0),
+ if (getBitwiseNotOperand(N01, N00,
/* AllowUndefs */ false) == N1)
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
// fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
- if (getBitwiseNotOperand(N0.getOperand(0), N0.getOperand(1),
+ if (getBitwiseNotOperand(N00, N01,
/* AllowUndefs */ false) == N1)
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
}
if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
@@ -7915,7 +7933,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
int64_t FirstOffset = INT64_MAX;
StoreSDNode *FirstStore = nullptr;
Optional<BaseIndexOffset> Base;
- for (auto Store : Stores) {
+ for (auto *Store : Stores) {
// All the stores store different parts of the CombinedValue. A truncate is
// required to get the partial value.
SDValue Trunc = Store->getValue();
@@ -8488,28 +8506,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
}
- if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
- ConstantSDNode *XorC = isConstOrConstSplat(N1);
- ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
- unsigned BitWidth = VT.getScalarSizeInBits();
- if (XorC && ShiftC) {
- // Don't crash on an oversized shift. We can not guarantee that a bogus
- // shift has been simplified to undef.
- uint64_t ShiftAmt = ShiftC->getLimitedValue();
- if (ShiftAmt < BitWidth) {
- APInt Ones = APInt::getAllOnes(BitWidth);
- Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
- if (XorC->getAPIntValue() == Ones) {
- // If the xor constant is a shifted -1, do a 'not' before the shift:
- // xor (X << ShiftC), XorC --> (not X) << ShiftC
- // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
- SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
- return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
- }
- }
- }
- }
-
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
@@ -11817,6 +11813,9 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
EVT N00VT = N00.getValueType();
SDLoc DL(N);
+ // Propagate fast-math-flags.
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
+
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// the same size as the compared operands. Try to optimize sext(setcc())
// if this is the case.
@@ -12358,6 +12357,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
return V;
if (N0.getOpcode() == ISD::SETCC) {
+ // Propagate fast-math-flags.
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
+
// Only do this before legalize for now.
if (!LegalOperations && VT.isVector() &&
N0.getValueType().getVectorElementType() == MVT::i1) {
@@ -12549,6 +12551,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
}
if (N0.getOpcode() == ISD::SETCC) {
+ // Propagate fast-math-flags.
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
+
// For vectors:
// aext(setcc) -> vsetcc
// aext(setcc) -> truncate(vsetcc)
@@ -13155,6 +13160,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return N0.getOperand(0);
}
+ // Try to narrow a truncate-of-sext_in_reg to the destination type:
+ // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
+ if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ N0.hasOneUse()) {
+ SDValue X = N0.getOperand(0);
+ SDValue ExtVal = N0.getOperand(1);
+ EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
+ if (ExtVT.bitsLT(VT)) {
+ SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
+ }
+ }
+
// If this is anyext(trunc), don't fold it, allow ourselves to be folded.
if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
return SDValue();
@@ -19478,7 +19496,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return Shuf;
// Handle <1 x ???> vector insertion special cases.
- if (VT.getVectorNumElements() == 1) {
+ if (NumElts == 1) {
// insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
InVal.getOperand(0).getValueType() == VT &&
@@ -19506,80 +19524,77 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
}
- // Attempt to fold the insertion into a legal BUILD_VECTOR.
+ // Attempt to convert an insert_vector_elt chain into a legal build_vector.
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
- auto UpdateBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
- assert(Ops.size() == NumElts && "Unexpected vector size");
-
- // Insert the element
- if (Elt < Ops.size()) {
- // All the operands of BUILD_VECTOR must have the same type;
- // we enforce that here.
- EVT OpVT = Ops[0].getValueType();
- Ops[Elt] =
- OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
+ // vXi1 vector - we don't need to recurse.
+ if (NumElts == 1)
+ return DAG.getBuildVector(VT, DL, {InVal});
+
+ // If we haven't already collected the element, insert into the op list.
+ EVT MaxEltVT = InVal.getValueType();
+ auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
+ unsigned Idx) {
+ if (!Ops[Idx]) {
+ Ops[Idx] = Elt;
+ if (VT.isInteger()) {
+ EVT EltVT = Elt.getValueType();
+ MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
+ }
}
+ };
- // Return the new vector
+ // Ensure all the operands are the same value type, fill any missing
+ // operands with UNDEF and create the BUILD_VECTOR.
+ auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
+ assert(Ops.size() == NumElts && "Unexpected vector size");
+ for (SDValue &Op : Ops) {
+ if (Op)
+ Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
+ else
+ Op = DAG.getUNDEF(MaxEltVT);
+ }
return DAG.getBuildVector(VT, DL, Ops);
};
- // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
- // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
- // vector elements.
- SmallVector<SDValue, 8> Ops;
+ SmallVector<SDValue, 8> Ops(NumElts, SDValue());
+ Ops[Elt] = InVal;
- // Do not combine these two vectors if the output vector will not replace
- // the input vector.
- if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
- Ops.append(InVec->op_begin(), InVec->op_end());
- return UpdateBuildVector(Ops);
- }
+ // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
+ for (SDValue CurVec = InVec; CurVec;) {
+ // UNDEF - build new BUILD_VECTOR from already inserted operands.
+ if (CurVec.isUndef())
+ return CanonicalizeBuildVector(Ops);
- if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.hasOneUse()) {
- Ops.push_back(InVec.getOperand(0));
- Ops.append(NumElts - 1, DAG.getUNDEF(InVec.getOperand(0).getValueType()));
- return UpdateBuildVector(Ops);
- }
+ // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
+ if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
+ for (unsigned I = 0; I != NumElts; ++I)
+ AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
+ return CanonicalizeBuildVector(Ops);
+ }
- if (InVec.isUndef()) {
- Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
- return UpdateBuildVector(Ops);
- }
+ // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
+ if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
+ AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
+ return CanonicalizeBuildVector(Ops);
+ }
- // If we're inserting into the end of a vector as part of an sequence, see
- // if we can create a BUILD_VECTOR by following the sequence back up the
- // chain.
- if (Elt == (NumElts - 1)) {
- SmallVector<SDValue> ReverseInsertions;
- ReverseInsertions.push_back(InVal);
-
- EVT MaxEltVT = InVal.getValueType();
- SDValue CurVec = InVec;
- for (unsigned I = 1; I != NumElts; ++I) {
- if (CurVec.getOpcode() != ISD::INSERT_VECTOR_ELT || !CurVec.hasOneUse())
- break;
+ // INSERT_VECTOR_ELT - insert operand and continue up the chain.
+ if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
+ if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
+ if (CurIdx->getAPIntValue().ult(NumElts)) {
+ unsigned Idx = CurIdx->getZExtValue();
+ AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
- auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2));
- if (!CurIdx || CurIdx->getAPIntValue() != ((NumElts - 1) - I))
- break;
- SDValue CurVal = CurVec.getOperand(1);
- ReverseInsertions.push_back(CurVal);
- if (VT.isInteger()) {
- EVT CurValVT = CurVal.getValueType();
- MaxEltVT = MaxEltVT.bitsGE(CurValVT) ? MaxEltVT : CurValVT;
- }
- CurVec = CurVec.getOperand(0);
- }
+ // Found entire BUILD_VECTOR.
+ if (all_of(Ops, [](SDValue Op) { return !!Op; }))
+ return CanonicalizeBuildVector(Ops);
- if (ReverseInsertions.size() == NumElts) {
- for (unsigned I = 0; I != NumElts; ++I) {
- SDValue Val = ReverseInsertions[(NumElts - 1) - I];
- Val = VT.isInteger() ? DAG.getAnyExtOrTrunc(Val, DL, MaxEltVT) : Val;
- Ops.push_back(Val);
- }
- return DAG.getBuildVector(VT, DL, Ops);
- }
+ CurVec = CurVec->getOperand(0);
+ continue;
+ }
+
+ // Failed to find a match in the chain - bail.
+ break;
}
}
@@ -22643,6 +22658,56 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ // If we're not performing a select/blend shuffle, see if we can convert the
+ // shuffle into a AND node, with all the out-of-lane elements are known zero.
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ bool IsInLaneMask = true;
+ ArrayRef<int> Mask = SVN->getMask();
+ SmallVector<int, 16> ClearMask(NumElts, -1);
+ APInt DemandedLHS = APInt::getNullValue(NumElts);
+ APInt DemandedRHS = APInt::getNullValue(NumElts);
+ for (int I = 0; I != (int)NumElts; ++I) {
+ int M = Mask[I];
+ if (M < 0)
+ continue;
+ ClearMask[I] = M == I ? I : (I + NumElts);
+ IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
+ if (M != I) {
+ APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
+ Demanded.setBit(M % NumElts);
+ }
+ }
+ // TODO: Should we try to mask with N1 as well?
+ if (!IsInLaneMask &&
+ (!DemandedLHS.isNullValue() || !DemandedRHS.isNullValue()) &&
+ (DemandedLHS.isNullValue() ||
+ DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
+ (DemandedRHS.isNullValue() ||
+ DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
+ SDLoc DL(N);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+ EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
+ SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
+ SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
+ SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
+ for (int I = 0; I != (int)NumElts; ++I)
+ if (0 <= Mask[I])
+ AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
+
+ // See if a clear mask is legal instead of going via
+ // XformToShuffleWithZero which loses UNDEF mask elements.
+ if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
+ return DAG.getBitcast(
+ VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
+ DAG.getConstant(0, DL, IntVT), ClearMask));
+
+ if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
+ return DAG.getBitcast(
+ VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
+ DAG.getBuildVector(IntVT, DL, AndMask)));
+ }
+ }
+
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
@@ -23385,10 +23450,14 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
int Index0, Index1;
SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
+ // Extract element from splat_vector should be free.
+ // TODO: use DAG.isSplatValue instead?
+ bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
+ N1.getOpcode() == ISD::SPLAT_VECTOR;
if (!Src0 || !Src1 || Index0 != Index1 ||
Src0.getValueType().getVectorElementType() != EltVT ||
Src1.getValueType().getVectorElementType() != EltVT ||
- !TLI.isExtractVecEltCheap(VT, Index0) ||
+ !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
return SDValue();
@@ -23410,6 +23479,8 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
}
// bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
+ if (VT.isScalableVector())
+ return DAG.getSplatVector(VT, DL, ScalarBO);
SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
return DAG.getBuildVector(VT, DL, Ops);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 8bdc9410d131..56d35dfe8701 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1404,17 +1404,21 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
}
SDValue NewLoad;
+ Align ElementAlignment =
+ std::min(cast<StoreSDNode>(Ch)->getAlign(),
+ DAG.getDataLayout().getPrefTypeAlign(
+ Op.getValueType().getTypeForEVT(*DAG.getContext())));
if (Op.getValueType().isVector()) {
StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT,
Op.getValueType(), Idx);
- NewLoad =
- DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo());
+ NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,
+ MachinePointerInfo(), ElementAlignment);
} else {
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
- MachinePointerInfo(),
- VecVT.getVectorElementType());
+ MachinePointerInfo(), VecVT.getVectorElementType(),
+ ElementAlignment);
}
// Replace the chain going out of the store, by the one out of the load.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 6c136bdfc652..b2df67f45c72 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2918,6 +2918,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
case ISD::STACKMAP:
Res = SoftPromoteHalfOp_STACKMAP(N, OpNo);
break;
+ case ISD::PATCHPOINT:
+ Res = SoftPromoteHalfOp_PATCHPOINT(N, OpNo);
+ break;
}
if (!Res.getNode())
@@ -3059,3 +3062,18 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) {
return SDValue(); // Signal that we replaced the node ourselves.
}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_PATCHPOINT(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo >= 7);
+ SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end());
+ SDValue Op = N->getOperand(OpNo);
+ NewOps[OpNo] = GetSoftPromotedHalf(Op);
+ SDValue NewNode =
+ DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), NewOps);
+
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum));
+
+ return SDValue(); // Signal that we replaced the node ourselves.
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 343722a97c3c..228d4a43ccde 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1727,6 +1727,13 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::STACKMAP:
Res = PromoteIntOp_STACKMAP(N, OpNo);
break;
+ case ISD::PATCHPOINT:
+ Res = PromoteIntOp_PATCHPOINT(N, OpNo);
+ break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Res = PromoteIntOp_VP_STRIDED(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -2341,6 +2348,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) {
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) {
+ assert(OpNo >= 7);
+ SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end());
+ SDValue Operand = N->getOperand(OpNo);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType());
+ NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand);
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) {
+ assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) ||
+ (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4));
+
+ SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
//===----------------------------------------------------------------------===//
// Integer Result Expansion
//===----------------------------------------------------------------------===//
@@ -2886,11 +2912,15 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
- Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps);
+ Hi = DAG.computeKnownBits(HiOps[2]).isZero()
+ ? DAG.getNode(ISD::UADDO, dl, VTList, makeArrayRef(HiOps, 2))
+ : DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps);
} else {
Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
- Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps);
+ Hi = DAG.computeKnownBits(HiOps[2]).isZero()
+ ? DAG.getNode(ISD::USUBO, dl, VTList, makeArrayRef(HiOps, 2))
+ : DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps);
}
return;
}
@@ -4693,6 +4723,13 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::STACKMAP:
Res = ExpandIntOp_STACKMAP(N, OpNo);
break;
+ case ISD::PATCHPOINT:
+ Res = ExpandIntOp_PATCHPOINT(N, OpNo);
+ break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Res = ExpandIntOp_VP_STRIDED(N, OpNo);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -5108,6 +5145,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
return Swap.getValue(1);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) {
+ assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) ||
+ (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4));
+
+ SDValue Hi; // The upper half is dropped out.
+ SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
+ GetExpandedInteger(NewOps[OpNo], NewOps[OpNo], Hi);
+
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) {
SDLoc dl(N);
@@ -5253,21 +5301,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
unsigned NumElems = N->getNumOperands();
EVT NOutVTElem = NOutVT.getVectorElementType();
-
+ TargetLoweringBase::BooleanContent NOutBoolType = TLI.getBooleanContents(NOutVT);
+ unsigned NOutExtOpc = TargetLowering::getExtendForContent(NOutBoolType);
SDLoc dl(N);
SmallVector<SDValue, 8> Ops;
Ops.reserve(NumElems);
for (unsigned i = 0; i != NumElems; ++i) {
- SDValue Op;
+ SDValue Op = N->getOperand(i);
+ EVT OpVT = Op.getValueType();
// BUILD_VECTOR integer operand types are allowed to be larger than the
// result's element type. This may still be true after the promotion. For
// example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to
// (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>.
- if (N->getOperand(i).getValueType().bitsLT(NOutVTElem))
- Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
- else
- Op = N->getOperand(i);
+ if (OpVT.bitsLT(NOutVTElem)) {
+ unsigned ExtOpc = ISD::ANY_EXTEND;
+ // Attempt to extend constant bool vectors to match target's BooleanContent.
+ // While not necessary, this improves chances of the constant correctly
+ // folding with compare results (e.g. for NOT patterns).
+ if (OpVT == MVT::i1 && Op.getOpcode() == ISD::Constant)
+ ExtOpc = NOutExtOpc;
+ Op = DAG.getNode(ExtOpc, dl, NOutVTElem, Op);
+ }
Ops.push_back(Op);
}
@@ -5524,30 +5579,67 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
SDValue DAGTypeLegalizer::ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo) {
assert(OpNo > 1);
-
SDValue Op = N->getOperand(OpNo);
- SDLoc DL = SDLoc(N);
+
+ // FIXME: Non-constant operands are not yet handled:
+ // - https://github.com/llvm/llvm-project/issues/26431
+ // - https://github.com/llvm/llvm-project/issues/55957
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op);
+ if (!CN)
+ return SDValue();
+
+ // Copy operands before the one being expanded.
SmallVector<SDValue> NewOps;
+ for (unsigned I = 0; I < OpNo; I++)
+ NewOps.push_back(N->getOperand(I));
+
+ EVT Ty = Op.getValueType();
+ SDLoc DL = SDLoc(N);
+ if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) {
+ NewOps.push_back(
+ DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty));
+ } else {
+ // FIXME: https://github.com/llvm/llvm-project/issues/55609
+ return SDValue();
+ }
+
+ // Copy remaining operands.
+ for (unsigned I = OpNo + 1; I < N->getNumOperands(); I++)
+ NewOps.push_back(N->getOperand(I));
+
+ SDValue NewNode = DAG.getNode(N->getOpcode(), DL, N->getVTList(), NewOps);
+
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum));
+
+ return SDValue(); // Signal that we have replaced the node already.
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) {
+ assert(OpNo >= 7);
+ SDValue Op = N->getOperand(OpNo);
+
+ // FIXME: Non-constant operands are not yet handled:
+ // - https://github.com/llvm/llvm-project/issues/26431
+ // - https://github.com/llvm/llvm-project/issues/55957
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op);
+ if (!CN)
+ return SDValue();
// Copy operands before the one being expanded.
+ SmallVector<SDValue> NewOps;
for (unsigned I = 0; I < OpNo; I++)
NewOps.push_back(N->getOperand(I));
- if (Op->getOpcode() == ISD::Constant) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op);
- EVT Ty = Op.getValueType();
- if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) {
- NewOps.push_back(
- DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
- NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty));
- } else {
- // FIXME: https://github.com/llvm/llvm-project/issues/55609
- return SDValue();
- }
+ EVT Ty = Op.getValueType();
+ SDLoc DL = SDLoc(N);
+ if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) {
+ NewOps.push_back(
+ DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty));
} else {
- // FIXME: Non-constant operands are not yet handled:
- // - https://github.com/llvm/llvm-project/issues/26431
- // - https://github.com/llvm/llvm-project/issues/55957
+ // FIXME: https://github.com/llvm/llvm-project/issues/55609
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 2807b7f5ae68..6696b79cf885 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -403,6 +403,8 @@ private:
SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -495,6 +497,8 @@ private:
SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N);
SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode, const SDLoc &dl);
@@ -744,6 +748,7 @@ private:
SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_PATCHPOINT(SDNode *N, unsigned OpNo);
//===--------------------------------------------------------------------===//
// Scalarization Support: LegalizeVectorTypes.cpp
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 842ffa2aa23e..f5a1eae1e7fe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -737,6 +737,20 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::SELECT:
Results.push_back(ExpandSELECT(Node));
return;
+ case ISD::SELECT_CC: {
+ if (Node->getValueType(0).isScalableVector()) {
+ EVT CondVT = TLI.getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+ SDValue SetCC =
+ DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(4));
+ Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC,
+ Node->getOperand(2),
+ Node->getOperand(3)));
+ return;
+ }
+ break;
+ }
case ISD::FP_TO_UINT:
ExpandFP_TO_UINT(Node, Results);
return;
@@ -833,6 +847,16 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
+ if (Node->getValueType(0).isScalableVector()) {
+ if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ }
+ break;
case ISD::SMULFIX:
case ISD::UMULFIX:
if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 78fc407e9573..3ac2a7bddc5a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -793,7 +793,7 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Emit any debug values associated with the node.
if (N->getHasDebugValue()) {
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
- for (auto DV : DAG->GetDbgValues(N)) {
+ for (auto *DV : DAG->GetDbgValues(N)) {
if (!DV->isEmitted())
if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
BB->insert(InsertPos, DbgMI);
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 2a10157b404e..5166db033c62 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -749,7 +749,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
// source order number as N.
MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
- for (auto DV : DAG->GetDbgValues(N)) {
+ for (auto *DV : DAG->GetDbgValues(N)) {
if (DV->isEmitted())
continue;
unsigned DVOrder = DV->getOrder();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c8d0f5faf647..441437351852 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
@@ -602,7 +603,7 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
static void AddNodeIDOperands(FoldingSetNodeID &ID,
ArrayRef<SDValue> Ops) {
- for (auto& Op : Ops) {
+ for (const auto &Op : Ops) {
ID.AddPointer(Op.getNode());
ID.AddInteger(Op.getResNo());
}
@@ -611,7 +612,7 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
static void AddNodeIDOperands(FoldingSetNodeID &ID,
ArrayRef<SDUse> Ops) {
- for (auto& Op : Ops) {
+ for (const auto &Op : Ops) {
ID.AddPointer(Op.getNode());
ID.AddInteger(Op.getResNo());
}
@@ -2711,16 +2712,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
SubDemandedElts &= ScaledDemandedElts;
if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1))
return false;
-
- // Here we can't do "MatchAnyBits" operation merge for undef bits.
- // Because some operation only use part value of the source.
- // Take llvm.fshl.* for example:
- // t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32
- // t2: v2i64 = bitcast t1
- // t5: v2i64 = fshl t3, t4, t2
- // We can not convert t2 to {i64 undef, i64 undef}
- UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts,
- /*MatchAllBits=*/true);
+ // TODO: Add support for merging sub undef elements.
+ if (!SubUndefElts.isZero())
+ return false;
}
return true;
}
@@ -2947,6 +2941,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
+ case ISD::MERGE_VALUES:
+ return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts,
+ Depth + 1);
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every demanded vector element.
Known.Zero.setAllBits(); Known.One.setAllBits();
@@ -3219,12 +3216,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::mulhs(Known, Known2);
break;
}
- case ISD::UDIV: {
- Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = KnownBits::udiv(Known, Known2);
- break;
- }
case ISD::AVGCEILU: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -3339,6 +3330,38 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero |= Known2.Zero;
}
break;
+ case ISD::SHL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS: {
+ assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
+
+ // Collect lo/hi source values and concatenate.
+ // TODO: Would a KnownBits::concatBits helper be useful?
+ unsigned LoBits = Op.getOperand(0).getScalarValueSizeInBits();
+ unsigned HiBits = Op.getOperand(1).getScalarValueSizeInBits();
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = Known.anyext(LoBits + HiBits);
+ Known.insertBits(Known2, LoBits);
+
+ // Collect shift amount.
+ Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+ if (Opcode == ISD::SHL_PARTS)
+ Known = KnownBits::shl(Known, Known2);
+ else if (Opcode == ISD::SRA_PARTS)
+ Known = KnownBits::ashr(Known, Known2);
+ else // if (Opcode == ISD::SRL_PARTS)
+ Known = KnownBits::lshr(Known, Known2);
+
+ // TODO: Minimum shift low/high bits are known zero.
+
+ if (Op.getResNo() == 0)
+ Known = Known.extractBits(LoBits, 0);
+ else
+ Known = Known.extractBits(HiBits, LoBits);
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -3570,6 +3593,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
break;
}
+ case ISD::UDIV: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::udiv(Known, Known2);
+ break;
+ }
case ISD::SREM: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -3925,7 +3954,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::AssertZext:
Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
return VTBits-Tmp;
-
+ case ISD::MERGE_VALUES:
+ return ComputeNumSignBits(Op.getOperand(Op.getResNo()), DemandedElts,
+ Depth + 1);
case ISD::BUILD_VECTOR:
Tmp = VTBits;
for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
@@ -6105,8 +6136,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(N1.getValueType().isVector() == VT.isVector() &&
"FP_TO_*INT_SAT type should be vector iff the operand type is "
"vector!");
- assert((!VT.isVector() || VT.getVectorNumElements() ==
- N1.getValueType().getVectorNumElements()) &&
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"Vector element counts must match in FP_TO_*INT_SAT");
assert(!cast<VTSDNode>(N2)->getVT().isVector() &&
"Type to saturate to must be a scalar.");
@@ -6719,7 +6750,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo,
- const AAMDNodes &AAInfo) {
+ const AAMDNodes &AAInfo, AAResults *AA) {
// Turn a memcpy of undef to nop.
// FIXME: We need to honor volatile even is Src is undef.
if (Src.isUndef())
@@ -6782,6 +6813,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
AAMDNodes NewAAInfo = AAInfo;
NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr;
+ const Value *SrcVal = SrcPtrInfo.V.dyn_cast<const Value *>();
+ bool isConstant =
+ AA && SrcVal &&
+ AA->pointsToConstantMemory(MemoryLocation(SrcVal, Size, AAInfo));
+
MachineMemOperand::Flags MMOFlags =
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
SmallVector<SDValue, 16> OutLoadChains;
@@ -6843,6 +6879,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+ if (isConstant)
+ SrcMMOFlags |= MachineMemOperand::MOInvariant;
Value = DAG.getExtLoad(
ISD::EXTLOAD, dl, NVT, Chain,
@@ -7131,7 +7169,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
bool isVol, bool AlwaysInline, bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo,
- const AAMDNodes &AAInfo) {
+ const AAMDNodes &AAInfo, AAResults *AA) {
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -7142,7 +7180,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Result = getMemcpyLoadsAndStores(
*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment,
- isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo);
+ isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo, AA);
if (Result.getNode())
return Result;
}
@@ -7161,9 +7199,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
// use a (potentially long) sequence of loads and stores.
if (AlwaysInline) {
assert(ConstantSize && "AlwaysInline requires a constant size!");
- return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
- ConstantSize->getZExtValue(), Alignment,
- isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo);
+ return getMemcpyLoadsAndStores(
+ *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment,
+ isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo, AA);
}
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
@@ -7245,7 +7283,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo,
- const AAMDNodes &AAInfo) {
+ const AAMDNodes &AAInfo, AAResults *AA) {
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -8904,7 +8942,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
#ifndef NDEBUG
- for (auto &Op : Ops)
+ for (const auto &Op : Ops)
assert(Op.getOpcode() != ISD::DELETED_NODE &&
"Operand is DELETED_NODE!");
#endif
@@ -8928,6 +8966,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"True and False arms of SelectCC must have same type!");
assert(Ops[2].getValueType() == VT &&
"select_cc node must be of same type as true and false value!");
+ assert((!Ops[0].getValueType().isVector() ||
+ Ops[0].getValueType().getVectorElementCount() ==
+ VT.getVectorElementCount()) &&
+ "Expected select_cc with vector result to have the same sized "
+ "comparison type!");
break;
case ISD::BR_CC:
assert(NumOps == 5 && "BR_CC takes 5 operands!");
@@ -9018,12 +9061,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags);
#ifndef NDEBUG
- for (auto &Op : Ops)
+ for (const auto &Op : Ops)
assert(Op.getOpcode() != ISD::DELETED_NODE &&
"Operand is DELETED_NODE!");
#endif
switch (Opcode) {
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO: {
+ assert(VTList.NumVTs == 2 && Ops.size() == 2 &&
+ "Invalid add/sub overflow op!");
+ assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() &&
+ Ops[0].getValueType() == Ops[1].getValueType() &&
+ Ops[0].getValueType() == VTList.VTs[0] &&
+ "Binary operator types must match!");
+ SDValue N1 = Ops[0], N2 = Ops[1];
+ canonicalizeCommutativeBinop(Opcode, N1, N2);
+
+ // (X +- 0) -> X with zero-overflow.
+ ConstantSDNode *N2CV = isConstOrConstSplat(N2, /*AllowUndefs*/ false,
+ /*AllowTruncation*/ true);
+ if (N2CV && N2CV->isZero()) {
+ SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]);
+ return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags);
+ }
+ break;
+ }
case ISD::STRICT_FP_EXTEND:
assert(VTList.NumVTs == 2 && Ops.size() == 2 &&
"Invalid STRICT_FP_EXTEND!");
@@ -9914,7 +9979,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
return;
SmallVector<SDDbgValue *, 2> ClonedDVs;
- for (auto DV : GetDbgValues(&N)) {
+ for (auto *DV : GetDbgValues(&N)) {
if (DV->isInvalidated())
continue;
switch (N.getOpcode()) {
@@ -10268,7 +10333,7 @@ bool SelectionDAG::calculateDivergence(SDNode *N) {
}
if (TLI->isSDNodeSourceOfDivergence(N, FLI, DA))
return true;
- for (auto &Op : N->ops()) {
+ for (const auto &Op : N->ops()) {
if (Op.Val.getValueType() != MVT::Other && Op.getNode()->isDivergent())
return true;
}
@@ -10298,7 +10363,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
}
for (size_t I = 0; I != Order.size(); ++I) {
SDNode *N = Order[I];
- for (auto U : N->uses()) {
+ for (auto *U : N->uses()) {
unsigned &UnsortedOps = Degree[U];
if (0 == --UnsortedOps)
Order.push_back(U);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fe3c38ec590d..35650b9bd00e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1789,7 +1789,7 @@ static void findWasmUnwindDestinations(
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
UnwindDests.back().first->setIsEHScopeEntry();
break;
- } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
// Add the catchpad handlers to the possible destinations. We don't
// continue to the unwind destination of the catchswitch for wasm.
for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
@@ -1844,7 +1844,7 @@ static void findUnwindDestinations(
UnwindDests.back().first->setIsEHScopeEntry();
UnwindDests.back().first->setIsEHFuncletEntry();
break;
- } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
// Add the catchpad handlers to the possible destinations.
for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
@@ -2990,14 +2990,20 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
CopyToExportRegsIfNeeded(&I);
// Retrieve successors.
+ SmallPtrSet<BasicBlock *, 8> Dests;
+ Dests.insert(I.getDefaultDest());
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
- MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)];
- addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ BasicBlock *Dest = I.getIndirectDest(i);
+ MachineBasicBlock *Target = FuncInfo.MBBMap[Dest];
Target->setIsInlineAsmBrIndirectTarget();
+ Target->setHasAddressTaken();
+ // Don't add duplicate machine successors.
+ if (Dests.insert(Dest).second)
+ addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
}
CallBrMBB->normalizeSuccProbs();
@@ -4075,6 +4081,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
return;
bool isVolatile = I.isVolatile();
+ MachineMemOperand::Flags MMOFlags =
+ TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
SDValue Root;
bool ConstantMemory = false;
@@ -4091,6 +4099,12 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
+ MMOFlags |= MachineMemOperand::MOInvariant;
+
+ // FIXME: pointsToConstantMemory probably does not imply dereferenceable,
+ // but the previous usage implied it did. Probably should check
+ // isDereferenceableAndAlignedPointer.
+ MMOFlags |= MachineMemOperand::MODereferenceable;
} else {
// Do not serialize non-volatile loads against each other.
Root = DAG.getRoot();
@@ -4110,9 +4124,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
EVT PtrVT = Ptr.getValueType();
- MachineMemOperand::Flags MMOFlags
- = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
-
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
// Serializing loads here may result in excessive register pressure, and
@@ -5766,7 +5777,7 @@ static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
->getCalledFunction()
->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
"expected call_preallocated_setup Value");
- for (auto *U : PreallocatedSetup->users()) {
+ for (const auto *U : PreallocatedSetup->users()) {
auto *UseCall = cast<CallBase>(U);
const Function *Fn = UseCall->getCalledFunction();
if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
@@ -5859,11 +5870,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
- /* AlwaysInline */ false, isTC,
- MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)),
- I.getAAMetadata());
+ SDValue MC = DAG.getMemcpy(
+ Root, sdl, Op1, Op2, Op3, Alignment, isVol,
+ /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA);
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5881,11 +5891,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
- SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
- /* AlwaysInline */ true, isTC,
- MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)),
- I.getAAMetadata());
+ SDValue MC = DAG.getMemcpy(
+ getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
+ /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA);
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5940,7 +5949,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)),
- I.getAAMetadata());
+ I.getAAMetadata(), AA);
updateDAGForMaybeTailCall(MM);
return;
}
@@ -8855,7 +8864,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
}
break;
- case InlineAsm::isInput: {
+ case InlineAsm::isInput:
+ case InlineAsm::isLabel: {
SDValue InOperandVal = OpInfo.CallOperand;
if (OpInfo.isMatchingInputConstraint()) {
@@ -9295,19 +9305,18 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
- for (unsigned i = StartIdx, e = Call.arg_size(); i != e; ++i) {
- SDValue OpVal = Builder.getValue(Call.getArgOperand(i));
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
- Ops.push_back(
- Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
- Ops.push_back(
- Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
- } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
- const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
- Ops.push_back(Builder.DAG.getTargetFrameIndex(
- FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
- } else
- Ops.push_back(OpVal);
+ SelectionDAG &DAG = Builder.DAG;
+ for (unsigned I = StartIdx; I < Call.arg_size(); I++) {
+ SDValue Op = Builder.getValue(Call.getArgOperand(I));
+
+ // Things on the stack are pointer-typed, meaning that they are already
+ // legal and can be emitted directly to target nodes.
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ Ops.push_back(DAG.getTargetFrameIndex(FI->getIndex(), Op.getValueType()));
+ } else {
+ // Otherwise emit a target independent node to be legalised.
+ Ops.push_back(Builder.getValue(Call.getArgOperand(I)));
+ }
}
}
@@ -9359,20 +9368,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
Ops.push_back(ShadConst);
// Add the live variables.
- for (unsigned I = 2; I < CI.arg_size(); I++) {
- SDValue Op = getValue(CI.getArgOperand(I));
-
- // Things on the stack are pointer-typed, meaning that they are already
- // legal and can be emitted directly to target nodes.
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- Ops.push_back(DAG.getTargetFrameIndex(
- FI->getIndex(), TLI.getFrameIndexTy(DAG.getDataLayout())));
- } else {
- // Otherwise emit a target independent node to be legalised.
- Ops.push_back(getValue(CI.getArgOperand(I)));
- }
- }
+ addStackMapLiveVars(CI, 2, DL, Ops, *this);
// Create the STACKMAP node.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -9449,6 +9445,19 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
// Replace the target specific call node with the patchable intrinsic.
SmallVector<SDValue, 8> Ops;
+ // Push the chain.
+ Ops.push_back(*(Call->op_begin()));
+
+ // Optionally, push the glue (if any).
+ if (HasGlue)
+ Ops.push_back(*(Call->op_end() - 1));
+
+ // Push the register mask info.
+ if (HasGlue)
+ Ops.push_back(*(Call->op_end() - 2));
+ else
+ Ops.push_back(*(Call->op_end() - 1));
+
// Add the <id> and <numBytes> constants.
SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos));
Ops.push_back(DAG.getTargetConstant(
@@ -9477,27 +9486,13 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
Ops.push_back(getValue(CB.getArgOperand(i)));
- // Push the arguments from the call instruction up to the register mask.
+ // Push the arguments from the call instruction.
SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
Ops.append(Call->op_begin() + 2, e);
// Push live variables for the stack map.
addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this);
- // Push the register mask info.
- if (HasGlue)
- Ops.push_back(*(Call->op_end()-2));
- else
- Ops.push_back(*(Call->op_end()-1));
-
- // Push the chain (this is originally the first operand of the call, but
- // becomes now the last or second to last operand).
- Ops.push_back(*(Call->op_begin()));
-
- // Push the glue flag (last operand).
- if (HasGlue)
- Ops.push_back(*(Call->op_end()-1));
-
SDVTList NodeTys;
if (IsAnyRegCC && HasDef) {
// Create the return types based on the intrinsic definition
@@ -9514,13 +9509,12 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
// Replace the target specific call node with a PATCHPOINT node.
- MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
- dl, NodeTys, Ops);
+ SDValue PPV = DAG.getNode(ISD::PATCHPOINT, dl, NodeTys, Ops);
// Update the NodeMap.
if (HasDef) {
if (IsAnyRegCC)
- setValue(&CB, SDValue(MN, 0));
+ setValue(&CB, SDValue(PPV.getNode(), 0));
else
setValue(&CB, Result.first);
}
@@ -9531,10 +9525,10 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
// value.
if (IsAnyRegCC && HasDef) {
SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
- SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
+ SDValue To[] = {PPV.getValue(1), PPV.getValue(2)};
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
} else
- DAG.ReplaceAllUsesWith(Call, MN);
+ DAG.ReplaceAllUsesWith(Call, PPV.getNode());
DAG.DeleteNode(Call);
// Inform the Frame Information that we have a patchpoint in this function.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 9df0b64c26c3..6ba01664e756 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -488,6 +488,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
case ISD::STACKMAP:
return "stackmap";
+ case ISD::PATCHPOINT:
+ return "patchpoint";
// Vector Predication
#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 7f453f081982..d46a0a23cca3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2193,8 +2193,27 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) {
N->getOperand(0));
}
+void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops,
+ SDValue OpVal, SDLoc DL) {
+ SDNode *OpNode = OpVal.getNode();
+
+ // FrameIndex nodes should have been directly emitted to TargetFrameIndex
+ // nodes at DAG-construction time.
+ assert(OpNode->getOpcode() != ISD::FrameIndex);
+
+ if (OpNode->getOpcode() == ISD::Constant) {
+ Ops.push_back(
+ CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ Ops.push_back(
+ CurDAG->getTargetConstant(cast<ConstantSDNode>(OpNode)->getZExtValue(),
+ DL, OpVal.getValueType()));
+ } else {
+ Ops.push_back(OpVal);
+ }
+}
+
void SelectionDAGISel::Select_STACKMAP(SDNode *N) {
- std::vector<SDValue> Ops;
+ SmallVector<SDValue, 32> Ops;
auto *It = N->op_begin();
SDLoc DL(N);
@@ -2213,24 +2232,8 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) {
Ops.push_back(Shad);
// Live variable operands.
- for (; It != N->op_end(); It++) {
- SDNode *OpNode = It->getNode();
- SDValue O;
-
- // FrameIndex nodes should have been directly emitted to TargetFrameIndex
- // nodes at DAG-construction time.
- assert(OpNode->getOpcode() != ISD::FrameIndex);
-
- if (OpNode->getOpcode() == ISD::Constant) {
- Ops.push_back(
- CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
- O = CurDAG->getTargetConstant(
- cast<ConstantSDNode>(OpNode)->getZExtValue(), DL, It->getValueType());
- } else {
- O = *It;
- }
- Ops.push_back(O);
- }
+ for (; It != N->op_end(); It++)
+ pushStackMapLiveVariable(Ops, *It, DL);
Ops.push_back(Chain);
Ops.push_back(InFlag);
@@ -2239,6 +2242,57 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) {
CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops);
}
+void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) {
+ SmallVector<SDValue, 32> Ops;
+ auto *It = N->op_begin();
+ SDLoc DL(N);
+
+ // Cache arguments that will be moved to the end in the target node.
+ SDValue Chain = *It++;
+ Optional<SDValue> Glue;
+ if (It->getValueType() == MVT::Glue)
+ Glue = *It++;
+ SDValue RegMask = *It++;
+
+ // <id> operand.
+ SDValue ID = *It++;
+ assert(ID.getValueType() == MVT::i64);
+ Ops.push_back(ID);
+
+ // <numShadowBytes> operand.
+ SDValue Shad = *It++;
+ assert(Shad.getValueType() == MVT::i32);
+ Ops.push_back(Shad);
+
+ // Add the callee.
+ Ops.push_back(*It++);
+
+ // Add <numArgs>.
+ SDValue NumArgs = *It++;
+ assert(NumArgs.getValueType() == MVT::i32);
+ Ops.push_back(NumArgs);
+
+ // Calling convention.
+ Ops.push_back(*It++);
+
+ // Push the args for the call.
+ for (uint64_t I = cast<ConstantSDNode>(NumArgs)->getZExtValue(); I != 0; I--)
+ Ops.push_back(*It++);
+
+ // Now push the live variables.
+ for (; It != N->op_end(); It++)
+ pushStackMapLiveVariable(Ops, *It, DL);
+
+ // Finally, the regmask, chain and (if present) glue are moved to the end.
+ Ops.push_back(RegMask);
+ Ops.push_back(Chain);
+ if (Glue.has_value())
+ Ops.push_back(Glue.value());
+
+ SDVTList NodeTys = N->getVTList();
+ CurDAG->SelectNodeTo(N, TargetOpcode::PATCHPOINT, NodeTys, Ops);
+}
+
/// GetVBR - decode a vbr encoding whose top bit is set.
LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
@@ -2796,6 +2850,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::STACKMAP:
Select_STACKMAP(NodeToMatch);
return;
+ case ISD::PATCHPOINT:
+ Select_PATCHPOINT(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 3061158eea30..c5c093ae228f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -169,8 +169,14 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
// Spill location is known for gc relocates
if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
- const auto &RelocationMap =
- Builder.FuncInfo.StatepointRelocationMaps[Relocate->getStatepoint()];
+ const Value *Statepoint = Relocate->getStatepoint();
+ assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) &&
+ "GetStatepoint must return one of two types");
+ if (isa<UndefValue>(Statepoint))
+ return None;
+
+ const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps
+ [cast<GCStatepointInst>(Statepoint)];
auto It = RelocationMap.find(Relocate);
if (It == RelocationMap.end())
@@ -193,7 +199,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
if (const PHINode *Phi = dyn_cast<PHINode>(Val)) {
Optional<int> MergedResult = None;
- for (auto &IncomingValue : Phi->incoming_values()) {
+ for (const auto &IncomingValue : Phi->incoming_values()) {
Optional<int> SpillSlot =
findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1);
if (!SpillSlot)
@@ -569,9 +575,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// We cannot assing them to VRegs.
SmallSet<SDValue, 8> LPadPointers;
if (!UseRegistersForGCPointersInLandingPad)
- if (auto *StInvoke = dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) {
+ if (const auto *StInvoke =
+ dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) {
LandingPadInst *LPI = StInvoke->getLandingPadInst();
- for (auto *Relocate : SI.GCRelocates)
+ for (const auto *Relocate : SI.GCRelocates)
if (Relocate->getOperand(0) == LPI) {
LPadPointers.insert(Builder.getValue(Relocate->getBasePtr()));
LPadPointers.insert(Builder.getValue(Relocate->getDerivedPtr()));
@@ -739,7 +746,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n");
#ifndef NDEBUG
- for (auto *Reloc : SI.GCRelocates)
+ for (const auto *Reloc : SI.GCRelocates)
if (Reloc->getParent() == SI.StatepointInstr->getParent())
StatepointLowering.scheduleRelocCall(*Reloc);
#endif
@@ -1017,7 +1024,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
static std::pair<const GCResultInst*, const GCResultInst*>
getGCResultLocality(const GCStatepointInst &S) {
std::pair<const GCResultInst *, const GCResultInst*> Res(nullptr, nullptr);
- for (auto *U : S.users()) {
+ for (const auto *U : S.users()) {
auto *GRI = dyn_cast<GCResultInst>(U);
if (!GRI)
continue;
@@ -1195,9 +1202,13 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(
void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
// The result value of the gc_result is simply the result of the actual
// call. We've already emitted this, so just grab the value.
- const GCStatepointInst *SI = CI.getStatepoint();
+ const Value *SI = CI.getStatepoint();
+ assert((isa<GCStatepointInst>(SI) || isa<UndefValue>(SI)) &&
+ "GetStatepoint must return one of two types");
+ if (isa<UndefValue>(SI))
+ return;
- if (SI->getParent() == CI.getParent()) {
+ if (cast<GCStatepointInst>(SI)->getParent() == CI.getParent()) {
setValue(&CI, getValue(SI));
return;
}
@@ -1215,12 +1226,18 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
}
void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
+ const Value *Statepoint = Relocate.getStatepoint();
#ifndef NDEBUG
// Consistency check
// We skip this check for relocates not in the same basic block as their
// statepoint. It would be too expensive to preserve validation info through
// different basic blocks.
- if (Relocate.getStatepoint()->getParent() == Relocate.getParent())
+ assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) &&
+ "GetStatepoint must return one of two types");
+ if (isa<UndefValue>(Statepoint))
+ return;
+
+ if (cast<GCStatepointInst>(Statepoint)->getParent() == Relocate.getParent())
StatepointLowering.relocCallVisited(Relocate);
auto *Ty = Relocate.getType()->getScalarType();
@@ -1230,14 +1247,15 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
const Value *DerivedPtr = Relocate.getDerivedPtr();
auto &RelocationMap =
- FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()];
+ FuncInfo.StatepointRelocationMaps[cast<GCStatepointInst>(Statepoint)];
auto SlotIt = RelocationMap.find(&Relocate);
assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value");
const RecordType &Record = SlotIt->second;
// If relocation was done via virtual register..
if (Record.type == RecordType::SDValueNode) {
- assert(Relocate.getStatepoint()->getParent() == Relocate.getParent() &&
+ assert(cast<GCStatepointInst>(Statepoint)->getParent() ==
+ Relocate.getParent() &&
"Nonlocal gc.relocate mapped via SDValue");
SDValue SDV = StatepointLowering.getLocation(getValue(DerivedPtr));
assert(SDV.getNode() && "empty SDValue");
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 66389a57f780..cd4f0ae42bcd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1056,13 +1056,13 @@ bool TargetLowering::SimplifyDemandedBits(
// TODO: We can probably do more work on calculating the known bits and
// simplifying the operations for scalable vectors, but for now we just
// bail out.
- if (Op.getValueType().isScalableVector())
+ EVT VT = Op.getValueType();
+ if (VT.isScalableVector())
return false;
bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
unsigned NumElts = OriginalDemandedElts.getBitWidth();
- assert((!Op.getValueType().isVector() ||
- NumElts == Op.getValueType().getVectorNumElements()) &&
+ assert((!VT.isVector() || NumElts == VT.getVectorNumElements()) &&
"Unexpected vector size");
APInt DemandedBits = OriginalDemandedBits;
@@ -1088,7 +1088,6 @@ bool TargetLowering::SimplifyDemandedBits(
}
// Other users may use these bits.
- EVT VT = Op.getValueType();
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
if (Depth != 0) {
// If not at the root, Just compute the Known bits to
@@ -1468,6 +1467,33 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
+ // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
+ // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
+ if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
+ Op0->hasOneUse() && Op1->hasOneUse()) {
+ // Attempt to match all commutations - m_c_Or would've been useful!
+ for (int I = 0; I != 2; ++I) {
+ SDValue X = Op.getOperand(I).getOperand(0);
+ SDValue C1 = Op.getOperand(I).getOperand(1);
+ SDValue Alt = Op.getOperand(1 - I).getOperand(0);
+ SDValue C2 = Op.getOperand(1 - I).getOperand(1);
+ if (Alt.getOpcode() == ISD::OR) {
+ for (int J = 0; J != 2; ++J) {
+ if (X == Alt.getOperand(J)) {
+ SDValue Y = Alt.getOperand(1 - J);
+ if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
+ {C1, C2})) {
+ SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
+ SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
+ }
+ }
+ }
+ }
+ }
+ }
+
Known |= Known2;
break;
}
@@ -1500,7 +1526,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
- ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
+ ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
if (C) {
// If one side is a constant, and all of the set bits in the constant are
// also known set on the other side, turn this into an AND, as we know
@@ -1521,6 +1547,32 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
return TLO.CombineTo(Op, New);
}
+
+ unsigned Op0Opcode = Op0.getOpcode();
+ if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
+ if (ConstantSDNode *ShiftC =
+ isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
+ // Don't crash on an oversized shift. We can not guarantee that a
+ // bogus shift has been simplified to undef.
+ if (ShiftC->getAPIntValue().ult(BitWidth)) {
+ uint64_t ShiftAmt = ShiftC->getZExtValue();
+ APInt Ones = APInt::getAllOnes(BitWidth);
+ Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
+ : Ones.lshr(ShiftAmt);
+ const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
+ if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
+ TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
+ // If the xor constant is a demanded mask, do a 'not' before the
+ // shift:
+ // xor (X << ShiftC), XorC --> (not X) << ShiftC
+ // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
+ SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
+ Op0.getOperand(1)));
+ }
+ }
+ }
+ }
}
// If we can't turn this into a 'not', try to shrink the constant.
@@ -1723,6 +1775,26 @@ bool TargetLowering::SimplifyDemandedBits(
if ((ShAmt < DemandedBits.getActiveBits()) &&
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
+ } else {
+ // This is a variable shift, so we can't shift the demand mask by a known
+ // amount. But if we are not demanding high bits, then we are not
+ // demanding those bits from the pre-shifted operand either.
+ if (unsigned CTLZ = DemandedBits.countLeadingZeros()) {
+ APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
+ if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
+ Depth + 1)) {
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op->setFlags(Flags);
+ }
+ return true;
+ }
+ Known.resetAll();
+ }
}
// If we are only demanding sign bits then we can use the shift source
@@ -3292,6 +3364,12 @@ bool TargetLowering::SimplifyDemandedVectorElts(
TLO, Depth + 1))
return true;
+ // If every element pair has a zero/undef then just fold to zero.
+ // fold (and x, undef) -> 0 / (and x, 0) -> 0
+ // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
+ if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+
// If either side has a zero element, then the result element is zero, even
// if the other is an UNDEF.
// TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
@@ -3301,7 +3379,6 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownUndef &= ~KnownZero;
// Attempt to avoid multi-use ops if we don't need anything from them.
- // TODO - use KnownUndef to relax the demandedelts?
if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
@@ -5204,6 +5281,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
// ConstraintOperands list.
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
+ unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
ConstraintOperands.emplace_back(std::move(CI));
@@ -5240,6 +5318,14 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
case InlineAsm::isInput:
OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
break;
+ case InlineAsm::isLabel:
+ OpInfo.CallOperandVal =
+ cast<CallBrInst>(&Call)->getBlockAddressForIndirectDest(LabelNo);
+ OpInfo.ConstraintVT =
+ getAsmOperandValueType(DL, OpInfo.CallOperandVal->getType())
+ .getSimpleVT();
+ ++LabelNo;
+ continue;
case InlineAsm::isClobber:
// Nothing to do.
break;
@@ -5852,22 +5938,22 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
const APInt& Divisor = C->getAPIntValue();
- UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor);
+ UnsignedDivisionByConstantInfo magics =
+ UnsignedDivisionByConstantInfo::get(Divisor);
unsigned PreShift = 0, PostShift = 0;
// If the divisor is even, we can avoid using the expensive fixup by
// shifting the divided value upfront.
- if (magics.IsAdd != 0 && !Divisor[0]) {
+ if (magics.IsAdd && !Divisor[0]) {
PreShift = Divisor.countTrailingZeros();
// Get magic number for the shifted divisor.
- magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
- assert(magics.IsAdd == 0 && "Should use cheap fixup now");
+ magics =
+ UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(!magics.IsAdd && "Should use cheap fixup now");
}
- APInt Magic = magics.Magic;
-
unsigned SelNPQ;
- if (magics.IsAdd == 0 || Divisor.isOne()) {
+ if (!magics.IsAdd || Divisor.isOne()) {
assert(magics.ShiftAmount < Divisor.getBitWidth() &&
"We shouldn't generate an undefined shift!");
PostShift = magics.ShiftAmount;
@@ -5878,7 +5964,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
}
PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
- MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
+ MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
NPQFactors.push_back(
DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
: APInt::getZero(EltBits),
diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp
index ffac68a223bf..ee3a0164564e 100644
--- a/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -179,21 +179,12 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End) {
- // FIXME: Is this really necessary? The only caller repairIntervalsForRange()
- // does the same thing.
- // Find anchor points, which are at the beginning/end of blocks or at
- // instructions that already have indexes.
- while (Begin != MBB->begin() && !hasIndex(*Begin))
- --Begin;
- while (End != MBB->end() && !hasIndex(*End))
- ++End;
-
bool includeStart = (Begin == MBB->begin());
SlotIndex startIdx;
if (includeStart)
startIdx = getMBBStartIdx(MBB);
else
- startIdx = getInstructionIndex(*Begin);
+ startIdx = getInstructionIndex(*--Begin);
SlotIndex endIdx;
if (End == MBB->end())
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index 140a91ae342b..94149f56e703 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -347,13 +347,11 @@ void SplitAnalysis::analyze(const LiveInterval *li) {
//===----------------------------------------------------------------------===//
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
-SplitEditor::SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA,
- LiveIntervals &LIS, VirtRegMap &VRM,
+SplitEditor::SplitEditor(SplitAnalysis &SA, LiveIntervals &LIS, VirtRegMap &VRM,
MachineDominatorTree &MDT,
MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI)
- : SA(SA), AA(AA), LIS(LIS), VRM(VRM),
- MRI(VRM.getMachineFunction().getRegInfo()), MDT(MDT),
- TII(*VRM.getMachineFunction().getSubtarget().getInstrInfo()),
+ : SA(SA), LIS(LIS), VRM(VRM), MRI(VRM.getMachineFunction().getRegInfo()),
+ MDT(MDT), TII(*VRM.getMachineFunction().getSubtarget().getInstrInfo()),
TRI(*VRM.getMachineFunction().getSubtarget().getRegisterInfo()),
MBFI(MBFI), VRAI(VRAI), RegAssign(Allocator) {}
@@ -371,9 +369,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
LICalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
&LIS.getVNInfoAllocator());
- // We don't need an AliasAnalysis since we will only be performing
- // cheap-as-a-copy remats anyway.
- Edit->anyRematerializable(nullptr);
+ Edit->anyRematerializable();
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1454,7 +1450,7 @@ void SplitEditor::deleteRematVictims() {
if (Dead.empty())
return;
- Edit->eliminateDeadDefs(Dead, None, &AA);
+ Edit->eliminateDeadDefs(Dead, None);
}
void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) {
diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h
index 4400a797d38e..556b022b93fb 100644
--- a/llvm/lib/CodeGen/SplitKit.h
+++ b/llvm/lib/CodeGen/SplitKit.h
@@ -257,7 +257,6 @@ public:
///
class LLVM_LIBRARY_VISIBILITY SplitEditor {
SplitAnalysis &SA;
- AAResults &AA;
LiveIntervals &LIS;
VirtRegMap &VRM;
MachineRegisterInfo &MRI;
@@ -436,9 +435,9 @@ private:
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
/// Newly created intervals will be appended to newIntervals.
- SplitEditor(SplitAnalysis &SA, AAResults &AA, LiveIntervals &LIS,
- VirtRegMap &VRM, MachineDominatorTree &MDT,
- MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI);
+ SplitEditor(SplitAnalysis &SA, LiveIntervals &LIS, VirtRegMap &VRM,
+ MachineDominatorTree &MDT, MachineBlockFrequencyInfo &MBFI,
+ VirtRegAuxInfo &VRAI);
/// reset - Prepare for a new split.
void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp
index 6757d6ca4f88..ccaff862fa3f 100644
--- a/llvm/lib/CodeGen/StackMaps.cpp
+++ b/llvm/lib/CodeGen/StackMaps.cpp
@@ -365,7 +365,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
});
for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) {
- for (auto II = std::next(I); II != E; ++II) {
+ for (auto *II = std::next(I); II != E; ++II) {
if (I->DwarfRegNum != II->DwarfRegNum) {
// Skip all the now invalid entries.
I = --II;
diff --git a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
index 4408011c95c0..2282d53e8ffd 100644
--- a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
+++ b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -267,7 +267,7 @@ void SwiftErrorValueTracking::preassignVRegs(
if (auto *CB = dyn_cast<CallBase>(&*It)) {
// A call-site with a swifterror argument is both use and def.
const Value *SwiftErrorAddr = nullptr;
- for (auto &Arg : CB->args()) {
+ for (const auto &Arg : CB->args()) {
if (!Arg->isSwiftError())
continue;
// Use of swifterror.
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index ba533a491b9c..18507b8fa84f 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -653,7 +653,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
// demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
// Disable tail duplication for this case for now, until the problem is
// fixed.
- for (auto SB : TailBB.successors()) {
+ for (auto *SB : TailBB.successors()) {
for (auto &I : *SB) {
if (!I.isPHI())
break;
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 2a987ee3eedf..4116231c005f 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -916,7 +916,7 @@ void TargetInstrInfo::genAlternativeCodeSequence(
}
bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
- const MachineInstr &MI, AAResults *AA) const {
+ const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getMF();
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -952,7 +952,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
return false;
// Avoid instructions which load from potentially varying memory.
- if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA))
+ if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad())
return false;
// If any of the registers accessed are non-constant, conservatively assume
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 6a595a4c748b..a342a4dd1e25 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1145,7 +1145,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
/// specified register class are all legal.
bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI,
const TargetRegisterClass &RC) const {
- for (auto I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I)
+ for (const auto *I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I)
if (isTypeLegal(*I))
return true;
return false;
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index c44fd9f97383..17fe819fa900 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1450,9 +1450,9 @@ void
TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
TiedPairList &TiedPairs,
unsigned &Dist) {
- bool IsEarlyClobber = llvm::find_if(TiedPairs, [MI](auto const &TP) {
- return MI->getOperand(TP.second).isEarlyClobber();
- }) != TiedPairs.end();
+ bool IsEarlyClobber = llvm::any_of(TiedPairs, [MI](auto const &TP) {
+ return MI->getOperand(TP.second).isEarlyClobber();
+ });
bool RemovedKillFlag = false;
bool AllUsesCopied = true;
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index 166a3c413f6a..8dc8d381ad16 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -446,7 +446,7 @@ void IRPromoter::ExtendSources() {
// Now, insert extending instructions between the sources and their users.
LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
- for (auto V : Sources) {
+ for (auto *V : Sources) {
LLVM_DEBUG(dbgs() << " - " << *V << "\n");
if (auto *I = dyn_cast<Instruction>(V))
InsertZExt(I, I);
@@ -524,7 +524,7 @@ void IRPromoter::TruncateSinks() {
// Fix up any stores or returns that use the results of the promoted
// chain.
- for (auto I : Sinks) {
+ for (auto *I : Sinks) {
LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n");
// Handle calls separately as we need to iterate over arg operands.
@@ -570,7 +570,7 @@ void IRPromoter::Cleanup() {
LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
// Some zexts will now have become redundant, along with their trunc
// operands, so remove them
- for (auto V : Visited) {
+ for (auto *V : Visited) {
if (!isa<ZExtInst>(V))
continue;
diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
index 8b5b585090f5..8225d4ea6996 100644
--- a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
+++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -579,7 +579,7 @@ static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
/// pressure, then return 0.
int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
PressureDiff &PD = DAG->getPressureDiff(SU);
- for (auto &P : PD) {
+ for (const auto &P : PD) {
if (!P.isValid())
continue;
// The pressure differences are computed bottom-up, so the comparision for
diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
index 298359dea9af..62b7f629f403 100644
--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -504,9 +504,14 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE(
&DIE);
return Flags;
}
+ if (*LowPc > *HighPc) {
+ reportWarning("low_pc greater than high_pc. Range will be discarded.\n",
+ File, &DIE);
+ return Flags;
+ }
// Replace the debug map range with a more accurate one.
- Ranges[*LowPc] = ObjFileAddressRange(*HighPc, MyInfo.AddrAdjust);
+ Ranges.insert({*LowPc, *HighPc}, MyInfo.AddrAdjust);
Unit.addFunctionRange(*LowPc, *HighPc, MyInfo.AddrAdjust);
return Flags;
}
@@ -1575,7 +1580,7 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit,
DWARFDataExtractor RangeExtractor(OrigDwarf.getDWARFObj(),
OrigDwarf.getDWARFObj().getRangesSection(),
OrigDwarf.isLittleEndian(), AddressSize);
- auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange;
+ Optional<std::pair<AddressRange, int64_t>> CurrRange;
DWARFUnit &OrigUnit = Unit.getOrigUnit();
auto OrigUnitDie = OrigUnit.getUnitDIE(false);
uint64_t OrigLowPc =
@@ -1598,12 +1603,11 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit,
if (!Entries.empty()) {
const DWARFDebugRangeList::RangeListEntry &First = Entries.front();
- if (CurrRange == InvalidRange ||
- First.StartAddress + OrigLowPc < CurrRange.start() ||
- First.StartAddress + OrigLowPc >= CurrRange.stop()) {
- CurrRange = FunctionRanges.find(First.StartAddress + OrigLowPc);
- if (CurrRange == InvalidRange ||
- CurrRange.start() > First.StartAddress + OrigLowPc) {
+ if (!CurrRange ||
+ !CurrRange->first.contains(First.StartAddress + OrigLowPc)) {
+ CurrRange = FunctionRanges.getRangeValueThatContains(
+ First.StartAddress + OrigLowPc);
+ if (!CurrRange) {
reportWarning("no mapping for range.", File);
continue;
}
@@ -1710,7 +1714,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
// in NewRows.
std::vector<DWARFDebugLine::Row> Seq;
const auto &FunctionRanges = Unit.getFunctionRanges();
- auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange;
+ Optional<std::pair<AddressRange, int64_t>> CurrRange;
// FIXME: This logic is meant to generate exactly the same output as
// Darwin's classic dsymutil. There is a nicer way to implement this
@@ -1729,19 +1733,14 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
// it is marked as end_sequence in the input (because in that
// case, the relocation offset is accurate and that entry won't
// serve as the start of another function).
- if (CurrRange == InvalidRange || Row.Address.Address < CurrRange.start() ||
- Row.Address.Address > CurrRange.stop() ||
- (Row.Address.Address == CurrRange.stop() && !Row.EndSequence)) {
+ if (!CurrRange || !CurrRange->first.contains(Row.Address.Address) ||
+ (Row.Address.Address == CurrRange->first.end() && !Row.EndSequence)) {
// We just stepped out of a known range. Insert a end_sequence
// corresponding to the end of the range.
- uint64_t StopAddress = CurrRange != InvalidRange
- ? CurrRange.stop() + CurrRange.value()
- : -1ULL;
- CurrRange = FunctionRanges.find(Row.Address.Address);
- bool CurrRangeValid =
- CurrRange != InvalidRange && CurrRange.start() <= Row.Address.Address;
- if (!CurrRangeValid) {
- CurrRange = InvalidRange;
+ uint64_t StopAddress =
+ CurrRange ? CurrRange->first.end() + CurrRange->second : -1ULL;
+ CurrRange = FunctionRanges.getRangeValueThatContains(Row.Address.Address);
+ if (!CurrRange) {
if (StopAddress != -1ULL) {
// Try harder by looking in the Address ranges map.
// There are corner cases where this finds a
@@ -1749,14 +1748,9 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
// for now do as dsymutil.
// FIXME: Understand exactly what cases this addresses and
// potentially remove it along with the Ranges map.
- auto Range = Ranges.lower_bound(Row.Address.Address);
- if (Range != Ranges.begin() && Range != Ranges.end())
- --Range;
-
- if (Range != Ranges.end() && Range->first <= Row.Address.Address &&
- Range->second.HighPC >= Row.Address.Address) {
- StopAddress = Row.Address.Address + Range->second.Offset;
- }
+ if (Optional<std::pair<AddressRange, int64_t>> Range =
+ Ranges.getRangeValueThatContains(Row.Address.Address))
+ StopAddress = Row.Address.Address + (*Range).second;
}
}
if (StopAddress != -1ULL && !Seq.empty()) {
@@ -1772,7 +1766,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
insertLineSequence(Seq, NewRows);
}
- if (!CurrRangeValid)
+ if (!CurrRange)
continue;
}
@@ -1781,7 +1775,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
continue;
// Relocate row address and add it to the current sequence.
- Row.Address.Address += CurrRange.value();
+ Row.Address.Address += CurrRange->second;
Seq.emplace_back(Row);
if (Row.EndSequence)
@@ -1921,11 +1915,9 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
// the function entry point, thus we can't just lookup the address
// in the debug map. Use the AddressInfo's range map to see if the FDE
// describes something that we can relocate.
- auto Range = Ranges.upper_bound(Loc);
- if (Range != Ranges.begin())
- --Range;
- if (Range == Ranges.end() || Range->first > Loc ||
- Range->second.HighPC <= Loc) {
+ Optional<std::pair<AddressRange, int64_t>> Range =
+ Ranges.getRangeValueThatContains(Loc);
+ if (!Range) {
// The +4 is to account for the size of the InitialLength field itself.
InputOffset = EntryOffset + InitialLength + 4;
continue;
@@ -1953,7 +1945,7 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
// fields that will get reconstructed by emitFDE().
unsigned FDERemainingBytes = InitialLength - (4 + AddrSize);
TheDwarfEmitter->emitFDE(IteratorInserted.first->getValue(), AddrSize,
- Loc + Range->second.Offset,
+ Loc + Range->second,
FrameData.substr(InputOffset, FDERemainingBytes));
InputOffset += FDERemainingBytes;
}
diff --git a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
index e9e8be7fd008..1cb20c0bb948 100644
--- a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
@@ -105,11 +105,7 @@ void CompileUnit::addLabelLowPc(uint64_t LabelLowPc, int64_t PcOffset) {
void CompileUnit::addFunctionRange(uint64_t FuncLowPc, uint64_t FuncHighPc,
int64_t PcOffset) {
- // Don't add empty ranges to the interval map. They are a problem because
- // the interval map expects half open intervals. This is safe because they
- // are empty anyway.
- if (FuncHighPc != FuncLowPc)
- Ranges.insert(FuncLowPc, FuncHighPc, PcOffset);
+ Ranges.insert({FuncLowPc, FuncHighPc}, PcOffset);
this->LowPc = std::min(LowPc, FuncLowPc + PcOffset);
this->HighPc = std::max(HighPc, FuncHighPc + PcOffset);
}
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 55ff6b14f945..a00e51fcf135 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -321,13 +321,14 @@ void DwarfStreamer::emitSwiftReflectionSection(
/// sized addresses describing the ranges.
void DwarfStreamer::emitRangesEntries(
int64_t UnitPcOffset, uint64_t OrigLowPc,
- const FunctionIntervals::const_iterator &FuncRange,
+ Optional<std::pair<AddressRange, int64_t>> FuncRange,
const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries,
unsigned AddressSize) {
MS->switchSection(MC->getObjectFileInfo()->getDwarfRangesSection());
// Offset each range by the right amount.
- int64_t PcOffset = Entries.empty() ? 0 : FuncRange.value() + UnitPcOffset;
+ int64_t PcOffset =
+ (Entries.empty() || !FuncRange) ? 0 : FuncRange->second + UnitPcOffset;
for (const auto &Range : Entries) {
if (Range.isBaseAddressSelectionEntry(AddressSize)) {
warn("unsupported base address selection operation",
@@ -339,8 +340,7 @@ void DwarfStreamer::emitRangesEntries(
continue;
// All range entries should lie in the function range.
- if (!(Range.StartAddress + OrigLowPc >= FuncRange.start() &&
- Range.EndAddress + OrigLowPc <= FuncRange.stop()))
+ if (!FuncRange->first.contains(Range.StartAddress + OrigLowPc))
warn("inconsistent range data.", "emitting debug_ranges");
MS->emitIntValue(Range.StartAddress + PcOffset, AddressSize);
MS->emitIntValue(Range.EndAddress + PcOffset, AddressSize);
@@ -365,11 +365,13 @@ void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit,
// IntervalMap will have coalesced the non-linked ranges, but here
// we want to coalesce the linked addresses.
std::vector<std::pair<uint64_t, uint64_t>> Ranges;
- const auto &FunctionRanges = Unit.getFunctionRanges();
- for (auto Range = FunctionRanges.begin(), End = FunctionRanges.end();
- Range != End; ++Range)
- Ranges.push_back(std::make_pair(Range.start() + Range.value(),
- Range.stop() + Range.value()));
+ const RangesTy &FunctionRanges = Unit.getFunctionRanges();
+ for (size_t Idx = 0; Idx < FunctionRanges.size(); Idx++) {
+ std::pair<AddressRange, int64_t> CurRange = FunctionRanges[Idx];
+
+ Ranges.push_back(std::make_pair(CurRange.first.start() + CurRange.second,
+ CurRange.first.end() + CurRange.second));
+ }
// The object addresses where sorted, but again, the linked
// addresses might end up in a different order.
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index 34615a73e328..44e39c019e0c 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
#include "llvm/Object/Decompressor.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
@@ -273,12 +274,16 @@ static Error createError(StringRef Name, Error E) {
static Error
handleCompressedSection(std::deque<SmallString<32>> &UncompressedSections,
- StringRef &Name, StringRef &Contents) {
- if (!Decompressor::isGnuStyle(Name))
+ SectionRef Sec, StringRef Name, StringRef &Contents) {
+ auto *Obj = dyn_cast<ELFObjectFileBase>(Sec.getObject());
+ if (!Obj ||
+ !(static_cast<ELFSectionRef>(Sec).getFlags() & ELF::SHF_COMPRESSED))
return Error::success();
-
- Expected<Decompressor> Dec =
- Decompressor::create(Name, Contents, false /*IsLE*/, false /*Is64Bit*/);
+ bool IsLE = isa<object::ELF32LEObjectFile>(Obj) ||
+ isa<object::ELF64LEObjectFile>(Obj);
+ bool Is64 = isa<object::ELF64LEObjectFile>(Obj) ||
+ isa<object::ELF64BEObjectFile>(Obj);
+ Expected<Decompressor> Dec = Decompressor::create(Name, Contents, IsLE, Is64);
if (!Dec)
return createError(Name, Dec.takeError());
@@ -286,7 +291,6 @@ handleCompressedSection(std::deque<SmallString<32>> &UncompressedSections,
if (Error E = Dec->resizeAndDecompress(UncompressedSections.back()))
return createError(Name, std::move(E));
- Name = Name.substr(2); // Drop ".z"
Contents = UncompressedSections.back();
return Error::success();
}
@@ -494,7 +498,8 @@ Error handleSection(
return ContentsOrErr.takeError();
StringRef Contents = *ContentsOrErr;
- if (auto Err = handleCompressedSection(UncompressedSections, Name, Contents))
+ if (auto Err = handleCompressedSection(UncompressedSections, Section, Name,
+ Contents))
return Err;
Name = Name.substr(Name.find_first_not_of("._"));
diff --git a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
index 27f63b9edcd0..7f4511258c64 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
@@ -36,8 +36,10 @@ using namespace llvm::codeview;
namespace {
#define error(X) \
- if (auto EC = X) \
- return EC;
+ do { \
+ if (auto EC = X) \
+ return EC; \
+ } while (false)
static const EnumEntry<TypeLeafKind> LeafTypeNames[] = {
#define CV_TYPE(enum, val) {#enum, enum},
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 2e567d8bc7ee..19d7d659a86a 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1645,7 +1645,7 @@ class DWARFObjInMemory final : public DWARFObject {
/// provided by Data. Otherwise leaves it unchanged.
Error maybeDecompress(const object::SectionRef &Sec, StringRef Name,
StringRef &Data) {
- if (!Decompressor::isCompressed(Sec))
+ if (!Sec.isCompressed())
return Error::success();
Expected<Decompressor> Decompressor =
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 33856c12b3c9..d2ed4fe018b5 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -29,10 +29,6 @@
using namespace llvm;
using namespace dwarf;
-namespace llvm {
-class DwarfContext;
-}
-
using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind;
namespace {
diff --git a/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/llvm/lib/DebugInfo/Symbolize/Markup.cpp
index 9bc65e763287..aa8a89812227 100644
--- a/llvm/lib/DebugInfo/Symbolize/Markup.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Markup.cpp
@@ -100,6 +100,9 @@ Optional<MarkupNode> MarkupParser::nextNode() {
}
void MarkupParser::flush() {
+ Buffer.clear();
+ NextIdx = 0;
+ Line = {};
if (InProgressMultiline.empty())
return;
FinishedMultiline.swap(InProgressMultiline);
diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
index 3363fe5e531f..91a51485026e 100644
--- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
@@ -10,14 +10,22 @@
/// This file defines the implementation of a filter that replaces symbolizer
/// markup with human-readable expressions.
///
+/// See https://llvm.org/docs/SymbolizerMarkupFormat.html
+///
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/DebugInfo/Symbolize/Markup.h"
+#include "llvm/Debuginfod/Debuginfod.h"
#include "llvm/Demangle/Demangle.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
@@ -28,30 +36,195 @@ MarkupFilter::MarkupFilter(raw_ostream &OS, Optional<bool> ColorsEnabled)
: OS(OS), ColorsEnabled(ColorsEnabled.value_or(
WithColor::defaultAutoDetectFunction()(OS))) {}
-void MarkupFilter::beginLine(StringRef Line) {
+void MarkupFilter::filter(StringRef Line) {
this->Line = Line;
resetColor();
+
+ Parser.parseLine(Line);
+ SmallVector<MarkupNode> DeferredNodes;
+ // See if the line is a contextual (i.e. contains a contextual element).
+ // In this case, anything after the contextual element is elided, or the whole
+ // line may be elided.
+ while (Optional<MarkupNode> Node = Parser.nextNode()) {
+ // If this was a contextual line, then summarily stop processing.
+ if (tryContextualElement(*Node, DeferredNodes))
+ return;
+ // This node may yet be part of an elided contextual line.
+ DeferredNodes.push_back(*Node);
+ }
+
+ // This was not a contextual line, so nothing in it should be elided.
+ endAnyModuleInfoLine();
+ for (const MarkupNode &Node : DeferredNodes)
+ filterNode(Node);
}
-void MarkupFilter::filter(const MarkupNode &Node) {
- if (!checkTag(Node))
- return;
+void MarkupFilter::finish() {
+ Parser.flush();
+ while (Optional<MarkupNode> Node = Parser.nextNode())
+ filterNode(*Node);
+ endAnyModuleInfoLine();
+ resetColor();
+ Modules.clear();
+ MMaps.clear();
+}
- if (trySGR(Node))
- return;
+// See if the given node is a contextual element and handle it if so. This may
+// either output or defer the element; in the former case, it will first emit
+// any DeferredNodes.
+//
+// Returns true if the given element was a contextual element. In this case,
+// DeferredNodes should be considered handled and should not be emitted. The
+// rest of the containing line must also be ignored in case the element was
+// deferred to a following line.
+bool MarkupFilter::tryContextualElement(
+ const MarkupNode &Node, const SmallVector<MarkupNode> &DeferredNodes) {
+ if (tryMMap(Node, DeferredNodes))
+ return true;
+ if (tryReset(Node, DeferredNodes))
+ return true;
+ return tryModule(Node, DeferredNodes);
+}
- if (Node.Tag == "symbol") {
- if (!checkNumFields(Node, 1))
- return;
+bool MarkupFilter::tryMMap(const MarkupNode &Node,
+ const SmallVector<MarkupNode> &DeferredNodes) {
+ if (Node.Tag != "mmap")
+ return false;
+ Optional<MMap> ParsedMMap = parseMMap(Node);
+ if (!ParsedMMap)
+ return true;
+
+ if (const MMap *M = overlappingMMap(*ParsedMMap)) {
+ WithColor::error(errs())
+ << formatv("overlapping mmap: #{0:x} [{1:x},{2:x})\n", M->Mod->ID,
+ M->Addr, M->Addr + M->Size);
+ reportLocation(Node.Fields[0].begin());
+ return true;
+ }
+
+ auto Res = MMaps.emplace(ParsedMMap->Addr, std::move(*ParsedMMap));
+ assert(Res.second && "Overlap check should ensure emplace succeeds.");
+ MMap &MMap = Res.first->second;
+
+ if (!MIL || MIL->Mod != MMap.Mod) {
+ endAnyModuleInfoLine();
+ for (const MarkupNode &Node : DeferredNodes)
+ filterNode(Node);
+ beginModuleInfoLine(MMap.Mod);
+ OS << "; adds";
+ }
+ MIL->MMaps.push_back(&MMap);
+ return true;
+}
+
+bool MarkupFilter::tryReset(const MarkupNode &Node,
+ const SmallVector<MarkupNode> &DeferredNodes) {
+ if (Node.Tag != "reset")
+ return false;
+ if (!checkNumFields(Node, 0))
+ return true;
+
+ if (!Modules.empty() || !MMaps.empty()) {
+ endAnyModuleInfoLine();
+ for (const MarkupNode &Node : DeferredNodes)
+ filterNode(Node);
highlight();
- OS << llvm::demangle(Node.Fields.front().str());
+ OS << "[[[reset]]]" << lineEnding();
restoreColor();
+
+ Modules.clear();
+ MMaps.clear();
+ }
+ return true;
+}
+
+bool MarkupFilter::tryModule(const MarkupNode &Node,
+ const SmallVector<MarkupNode> &DeferredNodes) {
+ if (Node.Tag != "module")
+ return false;
+ Optional<Module> ParsedModule = parseModule(Node);
+ if (!ParsedModule)
+ return true;
+
+ auto Res = Modules.try_emplace(
+ ParsedModule->ID, std::make_unique<Module>(std::move(*ParsedModule)));
+ if (!Res.second) {
+ WithColor::error(errs()) << "duplicate module ID\n";
+ reportLocation(Node.Fields[0].begin());
+ return true;
+ }
+ Module &Module = *Res.first->second;
+
+ endAnyModuleInfoLine();
+ for (const MarkupNode &Node : DeferredNodes)
+ filterNode(Node);
+ beginModuleInfoLine(&Module);
+ OS << "; BuildID=";
+ highlightValue();
+ OS << toHex(Module.BuildID, /*LowerCase=*/true);
+ highlight();
+ return true;
+}
+
+void MarkupFilter::beginModuleInfoLine(const Module *M) {
+ highlight();
+ OS << "[[[ELF module";
+ highlightValue();
+ OS << formatv(" #{0:x} \"{1}\"", M->ID, M->Name);
+ highlight();
+ MIL = ModuleInfoLine{M};
+}
+
+void MarkupFilter::endAnyModuleInfoLine() {
+ if (!MIL)
return;
+ llvm::stable_sort(MIL->MMaps, [](const MMap *A, const MMap *B) {
+ return A->Addr < B->Addr;
+ });
+ for (const MMap *M : MIL->MMaps) {
+ OS << (M == MIL->MMaps.front() ? ' ' : '-');
+ highlightValue();
+ OS << formatv("{0:x}", M->Addr);
+ highlight();
+ OS << '(';
+ highlightValue();
+ OS << M->Mode;
+ highlight();
+ OS << ')';
}
+ OS << "]]]" << lineEnding();
+ restoreColor();
+ MIL.reset();
+}
+
+// Handle a node that is known not to be a contextual element.
+void MarkupFilter::filterNode(const MarkupNode &Node) {
+ if (!checkTag(Node))
+ return;
+ if (tryPresentation(Node))
+ return;
+ if (trySGR(Node))
+ return;
OS << Node.Text;
}
+bool MarkupFilter::tryPresentation(const MarkupNode &Node) {
+ return trySymbol(Node);
+}
+
+bool MarkupFilter::trySymbol(const MarkupNode &Node) {
+ if (Node.Tag != "symbol")
+ return false;
+ if (!checkNumFields(Node, 1))
+ return true;
+
+ highlight();
+ OS << llvm::demangle(Node.Fields.front().str());
+ restoreColor();
+ return true;
+}
+
bool MarkupFilter::trySGR(const MarkupNode &Node) {
if (Node.Text == "\033[0m") {
resetColor();
@@ -93,6 +266,13 @@ void MarkupFilter::highlight() {
Bold);
}
+// Begin highlighting a field within a highlighted markup string.
+void MarkupFilter::highlightValue() {
+ if (!ColorsEnabled)
+ return;
+ OS.changeColor(raw_ostream::Colors::GREEN, Bold);
+}
+
// Set the output stream's color to the current color and bold state of the SGR
// abstract machine.
void MarkupFilter::restoreColor() {
@@ -117,6 +297,139 @@ void MarkupFilter::resetColor() {
OS.resetColor();
}
+// This macro helps reduce the amount of indirection done through Optional
+// below, since the usual case upon returning a None Optional is to return None.
+#define ASSIGN_OR_RETURN_NONE(TYPE, NAME, EXPR) \
+ auto NAME##Opt = (EXPR); \
+ if (!NAME##Opt) \
+ return None; \
+ TYPE NAME = std::move(*NAME##Opt)
+
+Optional<MarkupFilter::Module>
+MarkupFilter::parseModule(const MarkupNode &Element) const {
+ if (!checkNumFieldsAtLeast(Element, 3))
+ return None;
+ ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[0]));
+ StringRef Name = Element.Fields[1];
+ StringRef Type = Element.Fields[2];
+ if (Type != "elf") {
+ WithColor::error() << "unknown module type\n";
+ reportLocation(Type.begin());
+ return None;
+ }
+ if (!checkNumFields(Element, 4))
+ return None;
+ ASSIGN_OR_RETURN_NONE(SmallVector<uint8_t>, BuildID,
+ parseBuildID(Element.Fields[3]));
+ return Module{ID, Name.str(), std::move(BuildID)};
+}
+
+Optional<MarkupFilter::MMap>
+MarkupFilter::parseMMap(const MarkupNode &Element) const {
+ if (!checkNumFieldsAtLeast(Element, 3))
+ return None;
+ ASSIGN_OR_RETURN_NONE(uint64_t, Addr, parseAddr(Element.Fields[0]));
+ ASSIGN_OR_RETURN_NONE(uint64_t, Size, parseSize(Element.Fields[1]));
+ StringRef Type = Element.Fields[2];
+ if (Type != "load") {
+ WithColor::error() << "unknown mmap type\n";
+ reportLocation(Type.begin());
+ return None;
+ }
+ if (!checkNumFields(Element, 6))
+ return None;
+ ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[3]));
+ ASSIGN_OR_RETURN_NONE(std::string, Mode, parseMode(Element.Fields[4]));
+ auto It = Modules.find(ID);
+ if (It == Modules.end()) {
+ WithColor::error() << "unknown module ID\n";
+ reportLocation(Element.Fields[3].begin());
+ return None;
+ }
+ ASSIGN_OR_RETURN_NONE(uint64_t, ModuleRelativeAddr,
+ parseAddr(Element.Fields[5]));
+ return MMap{Addr, Size, It->second.get(), std::move(Mode),
+ ModuleRelativeAddr};
+}
+
+// Parse an address (%p in the spec).
+Optional<uint64_t> MarkupFilter::parseAddr(StringRef Str) const {
+ if (Str.empty()) {
+ reportTypeError(Str, "address");
+ return None;
+ }
+ if (all_of(Str, [](char C) { return C == '0'; }))
+ return 0;
+ if (!Str.startswith("0x")) {
+ reportTypeError(Str, "address");
+ return None;
+ }
+ uint64_t Addr;
+ if (Str.drop_front(2).getAsInteger(16, Addr)) {
+ reportTypeError(Str, "address");
+ return None;
+ }
+ return Addr;
+}
+
+// Parse a module ID (%i in the spec).
+Optional<uint64_t> MarkupFilter::parseModuleID(StringRef Str) const {
+ uint64_t ID;
+ if (Str.getAsInteger(0, ID)) {
+ reportTypeError(Str, "module ID");
+ return None;
+ }
+ return ID;
+}
+
+// Parse a size (%i in the spec).
+Optional<uint64_t> MarkupFilter::parseSize(StringRef Str) const {
+ uint64_t ID;
+ if (Str.getAsInteger(0, ID)) {
+ reportTypeError(Str, "size");
+ return None;
+ }
+ return ID;
+}
+
+// Parse a build ID (%x in the spec).
+Optional<SmallVector<uint8_t>> MarkupFilter::parseBuildID(StringRef Str) const {
+ std::string Bytes;
+ if (Str.empty() || Str.size() % 2 || !tryGetFromHex(Str, Bytes)) {
+ reportTypeError(Str, "build ID");
+ return None;
+ }
+ ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
+ Bytes.size());
+ return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
+}
+
+// Parses the mode string for an mmap element.
+Optional<std::string> MarkupFilter::parseMode(StringRef Str) const {
+ if (Str.empty()) {
+ reportTypeError(Str, "mode");
+ return None;
+ }
+
+ // Pop off each of r/R, w/W, and x/X from the front, in that order.
+ StringRef Remainder = Str;
+ if (!Remainder.empty() && tolower(Remainder.front()) == 'r')
+ Remainder = Remainder.drop_front();
+ if (!Remainder.empty() && tolower(Remainder.front()) == 'w')
+ Remainder = Remainder.drop_front();
+ if (!Remainder.empty() && tolower(Remainder.front()) == 'x')
+ Remainder = Remainder.drop_front();
+
+ // If anything remains, then the string wasn't a mode.
+ if (!Remainder.empty()) {
+ reportTypeError(Str, "mode");
+ return None;
+ }
+
+ // Normalize the mode.
+ return Str.lower();
+}
+
bool MarkupFilter::checkTag(const MarkupNode &Node) const {
if (any_of(Node.Tag, [](char C) { return C < 'a' || C > 'z'; })) {
WithColor::error(errs()) << "tags must be all lowercase characters\n";
@@ -126,18 +439,66 @@ bool MarkupFilter::checkTag(const MarkupNode &Node) const {
return true;
}
-bool MarkupFilter::checkNumFields(const MarkupNode &Node, size_t Size) const {
- if (Node.Fields.size() != Size) {
+bool MarkupFilter::checkNumFields(const MarkupNode &Element,
+ size_t Size) const {
+ if (Element.Fields.size() != Size) {
WithColor::error(errs()) << "expected " << Size << " fields; found "
- << Node.Fields.size() << "\n";
- reportLocation(Node.Tag.end());
+ << Element.Fields.size() << "\n";
+ reportLocation(Element.Tag.end());
return false;
}
return true;
}
+bool MarkupFilter::checkNumFieldsAtLeast(const MarkupNode &Element,
+ size_t Size) const {
+ if (Element.Fields.size() < Size) {
+ WithColor::error(errs())
+ << "expected at least " << Size << " fields; found "
+ << Element.Fields.size() << "\n";
+ reportLocation(Element.Tag.end());
+ return false;
+ }
+ return true;
+}
+
+void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const {
+ WithColor::error(errs()) << "expected " << TypeName << "; found '" << Str
+ << "'\n";
+ reportLocation(Str.begin());
+}
+
+// Prints two lines that point out the given location in the current Line using
+// a caret. The iterator must be within the bounds of the most recent line
+// passed to beginLine().
void MarkupFilter::reportLocation(StringRef::iterator Loc) const {
errs() << Line;
WithColor(errs().indent(Loc - Line.begin()), HighlightColor::String) << '^';
errs() << '\n';
}
+
+// Checks for an existing mmap that overlaps the given one and returns a
+// pointer to one of them.
+const MarkupFilter::MMap *MarkupFilter::overlappingMMap(const MMap &Map) const {
+ // If the given map contains the start of another mmap, they overlap.
+ auto I = MMaps.upper_bound(Map.Addr);
+ if (I != MMaps.end() && Map.contains(I->second.Addr))
+ return &I->second;
+
+ // If no element starts inside the given mmap, the only possible overlap would
+ // be if the preceding mmap contains the start point of the given mmap.
+ if (I != MMaps.begin()) {
+ --I;
+ if (I->second.contains(Map.Addr))
+ return &I->second;
+ }
+ return nullptr;
+}
+
+StringRef MarkupFilter::lineEnding() const {
+ return Line.endswith("\r\n") ? "\r\n" : "\n";
+}
+
+bool MarkupFilter::MMap::contains(uint64_t Addr) const {
+ return this->Addr <= Addr && Addr < this->Addr + Size;
+}
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
index 43b9c2ba400b..dc07eaeaf615 100644
--- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
@@ -524,4 +524,4 @@ COFFLinkGraphBuilder::exportCOMDATSymbol(COFFSymbolIndex SymIndex,
}
} // namespace jitlink
-} // namespace llvm \ No newline at end of file
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp b/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp
index 0fc366bf505f..2a60d8206f63 100644
--- a/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp
@@ -107,7 +107,7 @@ Error DWARFRecordSectionSplitter::processBlock(
}
uint64_t BlockSize = BlockReader.getOffset() - RecordStartOffset;
- auto &NewBlock = G.splitBlock(B, BlockSize);
+ auto &NewBlock = G.splitBlock(B, BlockSize, &Cache);
(void)NewBlock;
LLVM_DEBUG(dbgs() << " Extracted " << NewBlock << "\n");
}
diff --git a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
index c60f4b3b263c..70a3c404d836 100644
--- a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -48,6 +48,10 @@ JITTargetMachineBuilder::createTargetMachine() {
if (!TheTarget)
return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
+ if (!TheTarget->hasJIT())
+ return make_error<StringError>("Target has no JIT support",
+ inconvertibleErrorCode());
+
auto *TM =
TheTarget->createTargetMachine(TT.getTriple(), CPU, Features.getString(),
Options, RM, CM, OptLevel, /*JIT*/ true);
diff --git a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp
new file mode 100644
index 000000000000..c2e7baabb994
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp
@@ -0,0 +1,135 @@
+//=== MapperJITLinkMemoryManager.cpp - Memory management with MemoryMapper ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h"
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/Support/Process.h"
+
+#include <limits>
+
+using namespace llvm::jitlink;
+
+namespace llvm {
+namespace orc {
+
+class MapperJITLinkMemoryManager::InFlightAlloc
+ : public JITLinkMemoryManager::InFlightAlloc {
+public:
+ InFlightAlloc(MapperJITLinkMemoryManager &Parent, LinkGraph &G,
+ ExecutorAddr AllocAddr,
+ std::vector<MemoryMapper::AllocInfo::SegInfo> Segs)
+ : Parent(Parent), G(G), AllocAddr(AllocAddr), Segs(std::move(Segs)) {}
+
+ void finalize(OnFinalizedFunction OnFinalize) override {
+ MemoryMapper::AllocInfo AI;
+ AI.MappingBase = AllocAddr;
+
+ std::swap(AI.Segments, Segs);
+ std::swap(AI.Actions, G.allocActions());
+
+ Parent.Mapper->initialize(AI, [&](Expected<ExecutorAddr> Result) {
+ if (!Result) {
+ OnFinalize(Result.takeError());
+ return;
+ }
+
+ OnFinalize(FinalizedAlloc(*Result));
+ });
+ }
+
+ void abandon(OnAbandonedFunction OnFinalize) override {
+ Parent.Mapper->release({AllocAddr}, std::move(OnFinalize));
+ }
+
+private:
+ MapperJITLinkMemoryManager &Parent;
+ LinkGraph &G;
+ ExecutorAddr AllocAddr;
+ std::vector<MemoryMapper::AllocInfo::SegInfo> Segs;
+};
+
+MapperJITLinkMemoryManager::MapperJITLinkMemoryManager(
+ std::unique_ptr<MemoryMapper> Mapper)
+ : Mapper(std::move(Mapper)) {}
+
+void MapperJITLinkMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G,
+ OnAllocatedFunction OnAllocated) {
+ BasicLayout BL(G);
+
+ // find required address space
+ auto SegsSizes = BL.getContiguousPageBasedLayoutSizes(Mapper->getPageSize());
+ if (!SegsSizes) {
+ OnAllocated(SegsSizes.takeError());
+ return;
+ }
+
+ // Check if total size fits in address space
+ if (SegsSizes->total() > std::numeric_limits<size_t>::max()) {
+ OnAllocated(make_error<JITLinkError>(
+ formatv("Total requested size {:x} for graph {} exceeds address space",
+ SegsSizes->total(), G.getName())));
+ return;
+ }
+
+ Mapper->reserve(
+ SegsSizes->total(),
+ [this, &G, BL = std::move(BL), OnAllocated = std::move(OnAllocated)](
+ Expected<ExecutorAddrRange> Result) mutable {
+ if (!Result) {
+ return OnAllocated(Result.takeError());
+ }
+
+ auto NextSegAddr = Result->Start;
+
+ std::vector<MemoryMapper::AllocInfo::SegInfo> SegInfos;
+
+ for (auto &KV : BL.segments()) {
+ auto &AG = KV.first;
+ auto &Seg = KV.second;
+
+ auto TotalSize = Seg.ContentSize + Seg.ZeroFillSize;
+
+ Seg.Addr = NextSegAddr;
+ Seg.WorkingMem = Mapper->prepare(NextSegAddr, TotalSize);
+
+ NextSegAddr += alignTo(TotalSize, Mapper->getPageSize());
+
+ MemoryMapper::AllocInfo::SegInfo SI;
+ SI.Offset = Seg.Addr - Result->Start;
+ SI.ContentSize = Seg.ContentSize;
+ SI.ZeroFillSize = Seg.ZeroFillSize;
+ SI.Prot = (toSysMemoryProtectionFlags(AG.getMemProt()));
+ SI.WorkingMem = Seg.WorkingMem;
+
+ SegInfos.push_back(SI);
+ }
+
+ if (auto Err = BL.apply()) {
+ OnAllocated(std::move(Err));
+ return;
+ }
+
+ OnAllocated(std::make_unique<InFlightAlloc>(*this, G, Result->Start,
+ std::move(SegInfos)));
+ });
+}
+
+void MapperJITLinkMemoryManager::deallocate(
+ std::vector<FinalizedAlloc> Allocs, OnDeallocatedFunction OnDeallocated) {
+ std::vector<ExecutorAddr> Bases;
+ Bases.reserve(Allocs.size());
+ for (auto &FA : Allocs) {
+ Bases.push_back(FA.getAddress());
+ FA.release();
+ }
+ Mapper->release(Bases, std::move(OnDeallocated));
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
index 8b3fbd7117e2..ca3f64b8a409 100644
--- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -8,11 +8,33 @@
#include "llvm/ExecutionEngine/Orc/MemoryMapper.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/Support/WindowsError.h"
+
+#if defined(LLVM_ON_UNIX)
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#elif defined(_WIN32)
+#include <windows.h>
+#endif
+
namespace llvm {
namespace orc {
MemoryMapper::~MemoryMapper() {}
+InProcessMemoryMapper::InProcessMemoryMapper(size_t PageSize)
+ : PageSize(PageSize) {}
+
+Expected<std::unique_ptr<InProcessMemoryMapper>>
+InProcessMemoryMapper::Create() {
+ auto PageSize = sys::Process::getPageSize();
+ if (!PageSize)
+ return PageSize.takeError();
+ return std::make_unique<InProcessMemoryMapper>(*PageSize);
+}
+
void InProcessMemoryMapper::reserve(size_t NumBytes,
OnReservedFunction OnReserved) {
std::error_code EC;
@@ -147,6 +169,238 @@ InProcessMemoryMapper::~InProcessMemoryMapper() {
cantFail(F.get());
}
+// SharedMemoryMapper
+
+SharedMemoryMapper::SharedMemoryMapper(ExecutorProcessControl &EPC,
+ SymbolAddrs SAs, size_t PageSize)
+ : EPC(EPC), SAs(SAs), PageSize(PageSize) {}
+
+Expected<std::unique_ptr<SharedMemoryMapper>>
+SharedMemoryMapper::Create(ExecutorProcessControl &EPC, SymbolAddrs SAs) {
+ auto PageSize = sys::Process::getPageSize();
+ if (!PageSize)
+ return PageSize.takeError();
+
+ return std::make_unique<SharedMemoryMapper>(EPC, SAs, *PageSize);
+}
+
+void SharedMemoryMapper::reserve(size_t NumBytes,
+ OnReservedFunction OnReserved) {
+#if defined(LLVM_ON_UNIX) || defined(_WIN32)
+
+ EPC.callSPSWrapperAsync<
+ rt::SPSExecutorSharedMemoryMapperServiceReserveSignature>(
+ SAs.Reserve,
+ [this, NumBytes, OnReserved = std::move(OnReserved)](
+ Error SerializationErr,
+ Expected<std::pair<ExecutorAddr, std::string>> Result) mutable {
+ if (SerializationErr) {
+ cantFail(Result.takeError());
+ return OnReserved(std::move(SerializationErr));
+ }
+
+ if (!Result)
+ return OnReserved(Result.takeError());
+
+ ExecutorAddr RemoteAddr;
+ std::string SharedMemoryName;
+ std::tie(RemoteAddr, SharedMemoryName) = std::move(*Result);
+
+ void *LocalAddr = nullptr;
+
+#if defined(LLVM_ON_UNIX)
+
+ int SharedMemoryFile = shm_open(SharedMemoryName.c_str(), O_RDWR, 0700);
+ if (SharedMemoryFile < 0) {
+ return OnReserved(errorCodeToError(
+ std::error_code(errno, std::generic_category())));
+ }
+
+ // this prevents other processes from accessing it by name
+ shm_unlink(SharedMemoryName.c_str());
+
+ LocalAddr = mmap(nullptr, NumBytes, PROT_READ | PROT_WRITE, MAP_SHARED,
+ SharedMemoryFile, 0);
+ if (LocalAddr == MAP_FAILED) {
+ return OnReserved(errorCodeToError(
+ std::error_code(errno, std::generic_category())));
+ }
+
+ close(SharedMemoryFile);
+
+#elif defined(_WIN32)
+
+ std::wstring WideSharedMemoryName(SharedMemoryName.begin(),
+ SharedMemoryName.end());
+ HANDLE SharedMemoryFile = OpenFileMappingW(
+ FILE_MAP_ALL_ACCESS, FALSE, WideSharedMemoryName.c_str());
+ if (!SharedMemoryFile)
+ return OnReserved(errorCodeToError(mapWindowsError(GetLastError())));
+
+ LocalAddr =
+ MapViewOfFile(SharedMemoryFile, FILE_MAP_ALL_ACCESS, 0, 0, 0);
+ if (!LocalAddr) {
+ CloseHandle(SharedMemoryFile);
+ return OnReserved(errorCodeToError(mapWindowsError(GetLastError())));
+ }
+
+ CloseHandle(SharedMemoryFile);
+
+#endif
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ Reservations.insert({RemoteAddr, {LocalAddr, NumBytes}});
+ }
+
+ OnReserved(ExecutorAddrRange(RemoteAddr, NumBytes));
+ },
+ SAs.Instance, static_cast<uint64_t>(NumBytes));
+
+#else
+ OnReserved(make_error<StringError>(
+ "SharedMemoryMapper is not supported on this platform yet",
+ inconvertibleErrorCode()));
+#endif
+}
+
+char *SharedMemoryMapper::prepare(ExecutorAddr Addr, size_t ContentSize) {
+ auto R = Reservations.upper_bound(Addr);
+ assert(R != Reservations.begin() && "Attempt to prepare unknown range");
+ R--;
+
+ ExecutorAddrDiff Offset = Addr - R->first;
+
+ return static_cast<char *>(R->second.LocalAddr) + Offset;
+}
+
+void SharedMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
+ OnInitializedFunction OnInitialized) {
+ auto Reservation = Reservations.find(AI.MappingBase);
+ assert(Reservation != Reservations.end() &&
+ "Attempt to initialize unreserved range");
+
+ tpctypes::SharedMemoryFinalizeRequest FR;
+
+ AI.Actions.swap(FR.Actions);
+
+ FR.Segments.reserve(AI.Segments.size());
+
+ for (auto Segment : AI.Segments) {
+ char *Base =
+ static_cast<char *>(Reservation->second.LocalAddr) + Segment.Offset;
+ std::memset(Base + Segment.ContentSize, 0, Segment.ZeroFillSize);
+
+ tpctypes::SharedMemorySegFinalizeRequest SegReq;
+ SegReq.Prot = tpctypes::toWireProtectionFlags(
+ static_cast<sys::Memory::ProtectionFlags>(Segment.Prot));
+ SegReq.Addr = AI.MappingBase + Segment.Offset;
+ SegReq.Size = Segment.ContentSize + Segment.ZeroFillSize;
+
+ FR.Segments.push_back(SegReq);
+ }
+
+ EPC.callSPSWrapperAsync<
+ rt::SPSExecutorSharedMemoryMapperServiceInitializeSignature>(
+ SAs.Initialize,
+ [OnInitialized = std::move(OnInitialized)](
+ Error SerializationErr, Expected<ExecutorAddr> Result) mutable {
+ if (SerializationErr) {
+ cantFail(Result.takeError());
+ return OnInitialized(std::move(SerializationErr));
+ }
+
+ OnInitialized(std::move(Result));
+ },
+ SAs.Instance, AI.MappingBase, std::move(FR));
+}
+
+void SharedMemoryMapper::deinitialize(
+ ArrayRef<ExecutorAddr> Allocations,
+ MemoryMapper::OnDeinitializedFunction OnDeinitialized) {
+ EPC.callSPSWrapperAsync<
+ rt::SPSExecutorSharedMemoryMapperServiceDeinitializeSignature>(
+ SAs.Deinitialize,
+ [OnDeinitialized = std::move(OnDeinitialized)](Error SerializationErr,
+ Error Result) mutable {
+ if (SerializationErr) {
+ cantFail(std::move(Result));
+ return OnDeinitialized(std::move(SerializationErr));
+ }
+
+ OnDeinitialized(std::move(Result));
+ },
+ SAs.Instance, Allocations);
+}
+
+void SharedMemoryMapper::release(ArrayRef<ExecutorAddr> Bases,
+ OnReleasedFunction OnReleased) {
+#if defined(LLVM_ON_UNIX) || defined(_WIN32)
+ Error Err = Error::success();
+
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+
+ for (auto Base : Bases) {
+
+#if defined(LLVM_ON_UNIX)
+
+ if (munmap(Reservations[Base].LocalAddr, Reservations[Base].Size) != 0)
+ Err = joinErrors(std::move(Err), errorCodeToError(std::error_code(
+ errno, std::generic_category())));
+
+#elif defined(_WIN32)
+
+ if (!UnmapViewOfFile(Reservations[Base].LocalAddr))
+ joinErrors(std::move(Err),
+ errorCodeToError(mapWindowsError(GetLastError())));
+
+#endif
+
+ Reservations.erase(Base);
+ }
+ }
+
+ EPC.callSPSWrapperAsync<
+ rt::SPSExecutorSharedMemoryMapperServiceReleaseSignature>(
+ SAs.Release,
+ [OnReleased = std::move(OnReleased),
+ Err = std::move(Err)](Error SerializationErr, Error Result) mutable {
+ if (SerializationErr) {
+ cantFail(std::move(Result));
+ return OnReleased(
+ joinErrors(std::move(Err), std::move(SerializationErr)));
+ }
+
+ return OnReleased(joinErrors(std::move(Err), std::move(Result)));
+ },
+ SAs.Instance, Bases);
+#else
+ OnReleased(make_error<StringError>(
+ "SharedMemoryMapper is not supported on this platform yet",
+ inconvertibleErrorCode()));
+#endif
+}
+
+SharedMemoryMapper::~SharedMemoryMapper() {
+ std::vector<ExecutorAddr> ReservationAddrs;
+ if (!Reservations.empty()) {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ {
+ ReservationAddrs.reserve(Reservations.size());
+ for (const auto &R : Reservations) {
+ ReservationAddrs.push_back(R.first);
+ }
+ }
+ }
+
+ std::promise<MSVCPError> P;
+ auto F = P.get_future();
+ release(ReservationAddrs, [&](Error Err) { P.set_value(std::move(Err)); });
+ // FIXME: Release can actually fail. The error should be propagated.
+ // Meanwhile, a better option is to explicitly call release().
+ cantFail(F.get());
+}
+
} // namespace orc
} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
index 5eae33121eb9..dfdd846c46a7 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
@@ -18,6 +18,7 @@ const char *SimpleExecutorDylibManagerOpenWrapperName =
"__llvm_orc_SimpleExecutorDylibManager_open_wrapper";
const char *SimpleExecutorDylibManagerLookupWrapperName =
"__llvm_orc_SimpleExecutorDylibManager_lookup_wrapper";
+
const char *SimpleExecutorMemoryManagerInstanceName =
"__llvm_orc_SimpleExecutorMemoryManager_Instance";
const char *SimpleExecutorMemoryManagerReserveWrapperName =
@@ -26,6 +27,18 @@ const char *SimpleExecutorMemoryManagerFinalizeWrapperName =
"__llvm_orc_SimpleExecutorMemoryManager_finalize_wrapper";
const char *SimpleExecutorMemoryManagerDeallocateWrapperName =
"__llvm_orc_SimpleExecutorMemoryManager_deallocate_wrapper";
+
+const char *ExecutorSharedMemoryMapperServiceInstanceName =
+ "__llvm_orc_ExecutorSharedMemoryMapperService_Instance";
+const char *ExecutorSharedMemoryMapperServiceReserveWrapperName =
+ "__llvm_orc_ExecutorSharedMemoryMapperService_Reserve";
+const char *ExecutorSharedMemoryMapperServiceInitializeWrapperName =
+ "__llvm_orc_ExecutorSharedMemoryMapperService_Initialize";
+const char *ExecutorSharedMemoryMapperServiceDeinitializeWrapperName =
+ "__llvm_orc_ExecutorSharedMemoryMapperService_Deinitialize";
+const char *ExecutorSharedMemoryMapperServiceReleaseWrapperName =
+ "__llvm_orc_ExecutorSharedMemoryMapperService_Release";
+
const char *MemoryWriteUInt8sWrapperName =
"__llvm_orc_bootstrap_mem_write_uint8s_wrapper";
const char *MemoryWriteUInt16sWrapperName =
@@ -36,10 +49,12 @@ const char *MemoryWriteUInt64sWrapperName =
"__llvm_orc_bootstrap_mem_write_uint64s_wrapper";
const char *MemoryWriteBuffersWrapperName =
"__llvm_orc_bootstrap_mem_write_buffers_wrapper";
+
const char *RegisterEHFrameSectionWrapperName =
"__llvm_orc_bootstrap_register_ehframe_section_wrapper";
const char *DeregisterEHFrameSectionWrapperName =
"__llvm_orc_bootstrap_deregister_ehframe_section_wrapper";
+
const char *RunAsMainWrapperName = "__llvm_orc_bootstrap_run_as_main_wrapper";
} // end namespace rt
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
new file mode 100644
index 000000000000..6c9f099061ae
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
@@ -0,0 +1,341 @@
+//===---------- ExecutorSharedMemoryMapperService.cpp -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/WindowsError.h"
+
+#include <sstream>
+
+#if defined(LLVM_ON_UNIX)
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+
+#if defined(_WIN32)
+static DWORD getWindowsProtectionFlags(unsigned Flags) {
+ switch (Flags & llvm::sys::Memory::MF_RWE_MASK) {
+ case llvm::sys::Memory::MF_READ:
+ return PAGE_READONLY;
+ case llvm::sys::Memory::MF_WRITE:
+ // Note: PAGE_WRITE is not supported by VirtualProtect
+ return PAGE_READWRITE;
+ case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE:
+ return PAGE_READWRITE;
+ case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_EXEC:
+ return PAGE_EXECUTE_READ;
+ case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE |
+ llvm::sys::Memory::MF_EXEC:
+ return PAGE_EXECUTE_READWRITE;
+ case llvm::sys::Memory::MF_EXEC:
+ return PAGE_EXECUTE;
+ default:
+ llvm_unreachable("Illegal memory protection flag specified!");
+ }
+ // Provide a default return value as required by some compilers.
+ return PAGE_NOACCESS;
+}
+#endif
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+Expected<std::pair<ExecutorAddr, std::string>>
+ExecutorSharedMemoryMapperService::reserve(uint64_t Size) {
+#if defined(LLVM_ON_UNIX) || defined(_WIN32)
+
+#if defined(LLVM_ON_UNIX)
+
+ std::string SharedMemoryName;
+ {
+ std::stringstream SharedMemoryNameStream;
+ SharedMemoryNameStream << "/jitlink_" << sys::Process::getProcessId() << '_'
+ << (++SharedMemoryCount);
+ SharedMemoryName = SharedMemoryNameStream.str();
+ }
+
+ int SharedMemoryFile =
+ shm_open(SharedMemoryName.c_str(), O_RDWR | O_CREAT | O_EXCL, 0700);
+ if (SharedMemoryFile < 0)
+ return errorCodeToError(std::error_code(errno, std::generic_category()));
+
+ // by default size is 0
+ if (ftruncate(SharedMemoryFile, Size) < 0)
+ return errorCodeToError(std::error_code(errno, std::generic_category()));
+
+ void *Addr = mmap(nullptr, Size, PROT_NONE, MAP_SHARED, SharedMemoryFile, 0);
+ if (Addr == MAP_FAILED)
+ return errorCodeToError(std::error_code(errno, std::generic_category()));
+
+ close(SharedMemoryFile);
+
+#elif defined(_WIN32)
+
+ std::string SharedMemoryName;
+ {
+ std::stringstream SharedMemoryNameStream;
+ SharedMemoryNameStream << "jitlink_" << sys::Process::getProcessId() << '_'
+ << (++SharedMemoryCount);
+ SharedMemoryName = SharedMemoryNameStream.str();
+ }
+
+ std::wstring WideSharedMemoryName(SharedMemoryName.begin(),
+ SharedMemoryName.end());
+ HANDLE SharedMemoryFile = CreateFileMappingW(
+ INVALID_HANDLE_VALUE, NULL, PAGE_EXECUTE_READWRITE, Size >> 32,
+ Size & 0xffffffff, WideSharedMemoryName.c_str());
+ if (!SharedMemoryFile)
+ return errorCodeToError(mapWindowsError(GetLastError()));
+
+ void *Addr = MapViewOfFile(SharedMemoryFile,
+ FILE_MAP_ALL_ACCESS | FILE_MAP_EXECUTE, 0, 0, 0);
+ if (!Addr) {
+ CloseHandle(SharedMemoryFile);
+ return errorCodeToError(mapWindowsError(GetLastError()));
+ }
+
+#endif
+
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ Reservations[Addr].Size = Size;
+#if defined(_WIN32)
+ Reservations[Addr].SharedMemoryFile = SharedMemoryFile;
+#endif
+ }
+
+ return std::make_pair(ExecutorAddr::fromPtr(Addr),
+ std::move(SharedMemoryName));
+#else
+ return make_error<StringError>(
+ "SharedMemoryMapper is not supported on this platform yet",
+ inconvertibleErrorCode());
+#endif
+}
+
+Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize(
+ ExecutorAddr Reservation, tpctypes::SharedMemoryFinalizeRequest &FR) {
+#if defined(LLVM_ON_UNIX) || defined(_WIN32)
+
+ ExecutorAddr MinAddr(~0ULL);
+
+ // Contents are already in place
+ for (auto &Segment : FR.Segments) {
+ if (Segment.Addr < MinAddr)
+ MinAddr = Segment.Addr;
+
+#if defined(LLVM_ON_UNIX)
+
+ int NativeProt = 0;
+ if (Segment.Prot & tpctypes::WPF_Read)
+ NativeProt |= PROT_READ;
+ if (Segment.Prot & tpctypes::WPF_Write)
+ NativeProt |= PROT_WRITE;
+ if (Segment.Prot & tpctypes::WPF_Exec)
+ NativeProt |= PROT_EXEC;
+
+ if (mprotect(Segment.Addr.toPtr<void *>(), Segment.Size, NativeProt))
+ return errorCodeToError(std::error_code(errno, std::generic_category()));
+
+#elif defined(_WIN32)
+
+ DWORD NativeProt =
+ getWindowsProtectionFlags(fromWireProtectionFlags(Segment.Prot));
+
+ if (!VirtualProtect(Segment.Addr.toPtr<void *>(), Segment.Size, NativeProt,
+ &NativeProt))
+ return errorCodeToError(mapWindowsError(GetLastError()));
+
+#endif
+
+ if (Segment.Prot & tpctypes::WPF_Exec)
+ sys::Memory::InvalidateInstructionCache(Segment.Addr.toPtr<void *>(),
+ Segment.Size);
+ }
+
+ // Run finalization actions and get deinitlization action list.
+ auto DeinitializeActions = shared::runFinalizeActions(FR.Actions);
+ if (!DeinitializeActions) {
+ return DeinitializeActions.takeError();
+ }
+
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ Allocations[MinAddr].DeinitializationActions =
+ std::move(*DeinitializeActions);
+ Reservations[Reservation.toPtr<void *>()].Allocations.push_back(MinAddr);
+ }
+
+ return MinAddr;
+
+#else
+ return make_error<StringError>(
+ "SharedMemoryMapper is not supported on this platform yet",
+ inconvertibleErrorCode());
+#endif
+}
+
+Error ExecutorSharedMemoryMapperService::deinitialize(
+ const std::vector<ExecutorAddr> &Bases) {
+ Error AllErr = Error::success();
+
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+
+ for (auto Base : Bases) {
+ if (Error Err = shared::runDeallocActions(
+ Allocations[Base].DeinitializationActions)) {
+ AllErr = joinErrors(std::move(AllErr), std::move(Err));
+ }
+
+ Allocations.erase(Base);
+ }
+ }
+
+ return AllErr;
+}
+
+Error ExecutorSharedMemoryMapperService::release(
+ const std::vector<ExecutorAddr> &Bases) {
+#if defined(LLVM_ON_UNIX) || defined(_WIN32)
+ Error Err = Error::success();
+
+ for (auto Base : Bases) {
+ std::vector<ExecutorAddr> AllocAddrs;
+ size_t Size;
+
+#if defined(_WIN32)
+ HANDLE SharedMemoryFile;
+#endif
+
+ {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ auto &R = Reservations[Base.toPtr<void *>()];
+ Size = R.Size;
+
+#if defined(_WIN32)
+ SharedMemoryFile = R.SharedMemoryFile;
+#endif
+
+ AllocAddrs.swap(R.Allocations);
+ }
+
+ // deinitialize sub allocations
+ if (Error E = deinitialize(AllocAddrs))
+ Err = joinErrors(std::move(Err), std::move(E));
+
+#if defined(LLVM_ON_UNIX)
+
+ if (munmap(Base.toPtr<void *>(), Size) != 0)
+ Err = joinErrors(std::move(Err), errorCodeToError(std::error_code(
+ errno, std::generic_category())));
+
+#elif defined(_WIN32)
+
+ if (!UnmapViewOfFile(Base.toPtr<void *>()))
+ Err = joinErrors(std::move(Err),
+ errorCodeToError(mapWindowsError(GetLastError())));
+
+ CloseHandle(SharedMemoryFile);
+
+#endif
+
+ std::lock_guard<std::mutex> Lock(Mutex);
+ Reservations.erase(Base.toPtr<void *>());
+ }
+
+ return Err;
+#else
+ return make_error<StringError>(
+ "SharedMemoryMapper is not supported on this platform yet",
+ inconvertibleErrorCode());
+#endif
+}
+
+Error ExecutorSharedMemoryMapperService::shutdown() {
+ std::vector<ExecutorAddr> ReservationAddrs;
+ if (!Reservations.empty()) {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ {
+ ReservationAddrs.reserve(Reservations.size());
+ for (const auto &R : Reservations) {
+ ReservationAddrs.push_back(ExecutorAddr::fromPtr(R.getFirst()));
+ }
+ }
+ }
+ return release(ReservationAddrs);
+
+ return Error::success();
+}
+
+void ExecutorSharedMemoryMapperService::addBootstrapSymbols(
+ StringMap<ExecutorAddr> &M) {
+ M[rt::ExecutorSharedMemoryMapperServiceInstanceName] =
+ ExecutorAddr::fromPtr(this);
+ M[rt::ExecutorSharedMemoryMapperServiceReserveWrapperName] =
+ ExecutorAddr::fromPtr(&reserveWrapper);
+ M[rt::ExecutorSharedMemoryMapperServiceInitializeWrapperName] =
+ ExecutorAddr::fromPtr(&initializeWrapper);
+ M[rt::ExecutorSharedMemoryMapperServiceDeinitializeWrapperName] =
+ ExecutorAddr::fromPtr(&deinitializeWrapper);
+ M[rt::ExecutorSharedMemoryMapperServiceReleaseWrapperName] =
+ ExecutorAddr::fromPtr(&releaseWrapper);
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+ExecutorSharedMemoryMapperService::reserveWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<
+ rt::SPSExecutorSharedMemoryMapperServiceReserveSignature>::
+ handle(ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &ExecutorSharedMemoryMapperService::reserve))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+ExecutorSharedMemoryMapperService::initializeWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<
+ rt::SPSExecutorSharedMemoryMapperServiceInitializeSignature>::
+ handle(ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &ExecutorSharedMemoryMapperService::initialize))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+ExecutorSharedMemoryMapperService::deinitializeWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<
+ rt::SPSExecutorSharedMemoryMapperServiceDeinitializeSignature>::
+ handle(ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &ExecutorSharedMemoryMapperService::deinitialize))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+ExecutorSharedMemoryMapperService::releaseWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<
+ rt::SPSExecutorSharedMemoryMapperServiceReleaseSignature>::
+ handle(ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &ExecutorSharedMemoryMapperService::release))
+ .release();
+}
+
+} // namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
index 7cadf3bb51a7..c848dd65fa7e 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
@@ -126,7 +126,8 @@ Error SimpleExecutorMemoryManager::finalize(tpctypes::FinalizeRequest &FR) {
inconvertibleErrorCode()));
char *Mem = Seg.Addr.toPtr<char *>();
- memcpy(Mem, Seg.Content.data(), Seg.Content.size());
+ if (!Seg.Content.empty())
+ memcpy(Mem, Seg.Content.data(), Seg.Content.size());
memset(Mem + Seg.Content.size(), 0, Seg.Size - Seg.Content.size());
assert(Seg.Size <= std::numeric_limits<size_t>::max());
if (auto EC = sys::Memory::protectMappedMemory(
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 574d9174bebf..cee4cddab5e8 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1453,7 +1453,36 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin());
InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin());
BodyGenCB(TaskAllocaIP, TaskBodyIP);
- Builder.SetInsertPoint(TaskExitBB);
+ Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin());
+
+ return Builder.saveIP();
+}
+
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc,
+ InsertPointTy AllocaIP,
+ BodyGenCallbackTy BodyGenCB) {
+ if (!updateToLocation(Loc))
+ return InsertPointTy();
+
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ Value *ThreadID = getOrCreateThreadID(Ident);
+
+ // Emit the @__kmpc_taskgroup runtime call to start the taskgroup
+ Function *TaskgroupFn =
+ getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
+ Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
+
+ BasicBlock *TaskgroupExitBB = splitBB(Builder, true, "taskgroup.exit");
+ BodyGenCB(AllocaIP, Builder.saveIP());
+
+ Builder.SetInsertPoint(TaskgroupExitBB);
+ // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup
+ Function *EndTaskgroupFn =
+ getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
+ Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
return Builder.saveIP();
}
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 53df94366760..d4138133721e 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -354,6 +354,8 @@ Function *Function::createWithDefaultAttr(FunctionType *Ty,
B.addAttribute("frame-pointer", "all");
break;
}
+ if (M->getModuleFlag("function_return_thunk_extern"))
+ B.addAttribute(Attribute::FnRetThunkExtern);
F->addFnAttrs(B);
return F;
}
diff --git a/llvm/lib/IR/GCStrategy.cpp b/llvm/lib/IR/GCStrategy.cpp
index f3bc5b74f8fd..5833dc26c57e 100644
--- a/llvm/lib/IR/GCStrategy.cpp
+++ b/llvm/lib/IR/GCStrategy.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/GCStrategy.h"
+#include "llvm/ADT/Twine.h"
using namespace llvm;
@@ -32,7 +33,7 @@ std::unique_ptr<GCStrategy> llvm::getGCStrategy(const StringRef Name) {
const std::string error =
std::string("unsupported GC: ") + Name.str() +
" (did you remember to link and initialize the library?)";
- report_fatal_error(error);
+ report_fatal_error(Twine(error));
} else
- report_fatal_error(std::string("unsupported GC: ") + Name.str());
+ report_fatal_error(Twine(std::string("unsupported GC: ") + Name.str()));
}
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index 3265050261c8..51a22897babd 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -316,32 +316,38 @@ bool GlobalObject::canIncreaseAlignment() const {
return true;
}
+template <typename Operation>
static const GlobalObject *
-findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases) {
- if (auto *GO = dyn_cast<GlobalObject>(C))
+findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases,
+ const Operation &Op) {
+ if (auto *GO = dyn_cast<GlobalObject>(C)) {
+ Op(*GO);
return GO;
- if (auto *GA = dyn_cast<GlobalAlias>(C))
+ }
+ if (auto *GA = dyn_cast<GlobalAlias>(C)) {
+ Op(*GA);
if (Aliases.insert(GA).second)
- return findBaseObject(GA->getOperand(0), Aliases);
+ return findBaseObject(GA->getOperand(0), Aliases, Op);
+ }
if (auto *CE = dyn_cast<ConstantExpr>(C)) {
switch (CE->getOpcode()) {
case Instruction::Add: {
- auto *LHS = findBaseObject(CE->getOperand(0), Aliases);
- auto *RHS = findBaseObject(CE->getOperand(1), Aliases);
+ auto *LHS = findBaseObject(CE->getOperand(0), Aliases, Op);
+ auto *RHS = findBaseObject(CE->getOperand(1), Aliases, Op);
if (LHS && RHS)
return nullptr;
return LHS ? LHS : RHS;
}
case Instruction::Sub: {
- if (findBaseObject(CE->getOperand(1), Aliases))
+ if (findBaseObject(CE->getOperand(1), Aliases, Op))
return nullptr;
- return findBaseObject(CE->getOperand(0), Aliases);
+ return findBaseObject(CE->getOperand(0), Aliases, Op);
}
case Instruction::IntToPtr:
case Instruction::PtrToInt:
case Instruction::BitCast:
case Instruction::GetElementPtr:
- return findBaseObject(CE->getOperand(0), Aliases);
+ return findBaseObject(CE->getOperand(0), Aliases, Op);
default:
break;
}
@@ -351,7 +357,7 @@ findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases) {
const GlobalObject *GlobalValue::getAliaseeObject() const {
DenseSet<const GlobalAlias *> Aliases;
- return findBaseObject(this, Aliases);
+ return findBaseObject(this, Aliases, [](const GlobalValue &) {});
}
bool GlobalValue::isAbsoluteSymbolRef() const {
@@ -544,7 +550,7 @@ void GlobalAlias::setAliasee(Constant *Aliasee) {
const GlobalObject *GlobalAlias::getAliaseeObject() const {
DenseSet<const GlobalAlias *> Aliases;
- return findBaseObject(getOperand(0), Aliases);
+ return findBaseObject(getOperand(0), Aliases, [](const GlobalValue &) {});
}
//===----------------------------------------------------------------------===//
@@ -577,5 +583,12 @@ void GlobalIFunc::eraseFromParent() {
const Function *GlobalIFunc::getResolverFunction() const {
DenseSet<const GlobalAlias *> Aliases;
- return dyn_cast<Function>(findBaseObject(getResolver(), Aliases));
+ return dyn_cast<Function>(
+ findBaseObject(getResolver(), Aliases, [](const GlobalValue &) {}));
+}
+
+void GlobalIFunc::applyAlongResolverPath(
+ function_ref<void(const GlobalValue &)> Op) const {
+ DenseSet<const GlobalAlias *> Aliases;
+ findBaseObject(getResolver(), Aliases, Op);
}
diff --git a/llvm/lib/IR/InlineAsm.cpp b/llvm/lib/IR/InlineAsm.cpp
index c75b1aa7c1d6..088fcfdec742 100644
--- a/llvm/lib/IR/InlineAsm.cpp
+++ b/llvm/lib/IR/InlineAsm.cpp
@@ -93,6 +93,9 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
} else if (*I == '=') {
++I;
Type = isOutput;
+ } else if (*I == '!') {
+ ++I;
+ Type = isLabel;
}
if (*I == '*') {
@@ -265,14 +268,14 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) {
return makeStringError("failed to parse constraints");
unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
- unsigned NumIndirect = 0;
+ unsigned NumIndirect = 0, NumLabels = 0;
for (const ConstraintInfo &Constraint : Constraints) {
switch (Constraint.Type) {
case InlineAsm::isOutput:
- if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0)
- return makeStringError("output constraint occurs after input "
- "or clobber constraint");
+ if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0 || NumLabels != 0)
+ return makeStringError("output constraint occurs after input, "
+ "clobber or label constraint");
if (!Constraint.isIndirect) {
++NumOutputs;
@@ -289,6 +292,13 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) {
case InlineAsm::isClobber:
++NumClobbers;
break;
+ case InlineAsm::isLabel:
+ if (NumClobbers)
+ return makeStringError("label constraint occurs after clobber "
+ "constraint");
+
+ ++NumLabels;
+ break;
}
}
@@ -312,5 +322,7 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) {
if (Ty->getNumParams() != NumInputs)
return makeStringError("number of input constraints does not match number "
"of parameters");
+
+ // We don't have access to labels here, NumLabels will be checked separately.
return Error::success();
}
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index b333f40f3ce9..26171f537244 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -960,15 +960,10 @@ void CallBrInst::init(FunctionType *FTy, Value *Fn, BasicBlock *Fallthrough,
setName(NameStr);
}
-void CallBrInst::updateArgBlockAddresses(unsigned i, BasicBlock *B) {
- assert(getNumIndirectDests() > i && "IndirectDest # out of range for callbr");
- if (BasicBlock *OldBB = getIndirectDest(i)) {
- BlockAddress *Old = BlockAddress::get(OldBB);
- BlockAddress *New = BlockAddress::get(B);
- for (unsigned ArgNo = 0, e = arg_size(); ArgNo != e; ++ArgNo)
- if (dyn_cast<BlockAddress>(getArgOperand(ArgNo)) == Old)
- setArgOperand(ArgNo, New);
- }
+BlockAddress *
+CallBrInst::getBlockAddressForIndirectDest(unsigned DestNo) const {
+ return BlockAddress::get(const_cast<Function *>(getFunction()),
+ getIndirectDest(DestNo));
}
CallBrInst::CallBrInst(const CallBrInst &CBI)
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 65a9a32ad2c5..c50d6901c9da 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -694,8 +694,10 @@ unsigned BinaryOpIntrinsic::getNoWrapKind() const {
return OverflowingBinaryOperator::NoUnsignedWrap;
}
-const GCStatepointInst *GCProjectionInst::getStatepoint() const {
+const Value *GCProjectionInst::getStatepoint() const {
const Value *Token = getArgOperand(0);
+ if (isa<UndefValue>(Token))
+ return Token;
// This takes care both of relocates for call statepoints and relocates
// on normal path of invoke statepoint.
@@ -714,13 +716,23 @@ const GCStatepointInst *GCProjectionInst::getStatepoint() const {
}
Value *GCRelocateInst::getBasePtr() const {
- if (auto Opt = getStatepoint()->getOperandBundle(LLVMContext::OB_gc_live))
+ auto Statepoint = getStatepoint();
+ if (isa<UndefValue>(Statepoint))
+ return UndefValue::get(Statepoint->getType());
+
+ auto *GCInst = cast<GCStatepointInst>(Statepoint);
+ if (auto Opt = GCInst->getOperandBundle(LLVMContext::OB_gc_live))
return *(Opt->Inputs.begin() + getBasePtrIndex());
- return *(getStatepoint()->arg_begin() + getBasePtrIndex());
+ return *(GCInst->arg_begin() + getBasePtrIndex());
}
Value *GCRelocateInst::getDerivedPtr() const {
- if (auto Opt = getStatepoint()->getOperandBundle(LLVMContext::OB_gc_live))
+ auto *Statepoint = getStatepoint();
+ if (isa<UndefValue>(Statepoint))
+ return UndefValue::get(Statepoint->getType());
+
+ auto *GCInst = cast<GCStatepointInst>(Statepoint);
+ if (auto Opt = GCInst->getOperandBundle(LLVMContext::OB_gc_live))
return *(Opt->Inputs.begin() + getDerivedPtrIndex());
- return *(getStatepoint()->arg_begin() + getDerivedPtrIndex());
+ return *(GCInst->arg_begin() + getDerivedPtrIndex());
}
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 75d02f4c8c82..e3ea256af16d 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -469,6 +469,9 @@ private:
void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty);
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
void visitProfMetadata(Instruction &I, MDNode *MD);
+ void visitCallStackMetadata(MDNode *MD);
+ void visitMemProfMetadata(Instruction &I, MDNode *MD);
+ void visitCallsiteMetadata(Instruction &I, MDNode *MD);
void visitAnnotationMetadata(MDNode *Annotation);
void visitAliasScopeMetadata(const MDNode *MD);
void visitAliasScopeListMetadata(const MDNode *MD);
@@ -1624,8 +1627,10 @@ Verifier::visitModuleFlag(const MDNode *Op,
break;
case Module::Min: {
- Check(mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2)),
- "invalid value for 'min' module flag (expected constant integer)",
+ auto *V = mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2));
+ Check(V && V->getValue().isNonNegative(),
+ "invalid value for 'min' module flag (expected constant non-negative "
+ "integer)",
Op->getOperand(2));
break;
}
@@ -2200,7 +2205,13 @@ bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) {
void Verifier::verifyInlineAsmCall(const CallBase &Call) {
const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
unsigned ArgNo = 0;
+ unsigned LabelNo = 0;
for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
+ if (CI.Type == InlineAsm::isLabel) {
+ ++LabelNo;
+ continue;
+ }
+
// Only deal with constraints that correspond to call arguments.
if (!CI.hasArg())
continue;
@@ -2222,6 +2233,15 @@ void Verifier::verifyInlineAsmCall(const CallBase &Call) {
ArgNo++;
}
+
+ if (auto *CallBr = dyn_cast<CallBrInst>(&Call)) {
+ Check(LabelNo == CallBr->getNumIndirectDests(),
+ "Number of label constraints does not match number of callbr dests",
+ &Call);
+ } else {
+ Check(LabelNo == 0, "Label constraints can only be used with callbr",
+ &Call);
+ }
}
/// Verify that statepoint intrinsic is well formed.
@@ -2839,25 +2859,6 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) {
Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI);
const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
- for (unsigned i = 0, e = CBI.getNumSuccessors(); i != e; ++i)
- Check(CBI.getSuccessor(i)->getType()->isLabelTy(),
- "Callbr successors must all have pointer type!", &CBI);
- for (unsigned i = 0, e = CBI.getNumOperands(); i != e; ++i) {
- Check(i >= CBI.arg_size() || !isa<BasicBlock>(CBI.getOperand(i)),
- "Using an unescaped label as a callbr argument!", &CBI);
- if (isa<BasicBlock>(CBI.getOperand(i)))
- for (unsigned j = i + 1; j != e; ++j)
- Check(CBI.getOperand(i) != CBI.getOperand(j),
- "Duplicate callbr destination!", &CBI);
- }
- {
- SmallPtrSet<BasicBlock *, 4> ArgBBs;
- for (Value *V : CBI.args())
- if (auto *BA = dyn_cast<BlockAddress>(V))
- ArgBBs.insert(BA->getBasicBlock());
- for (BasicBlock *BB : CBI.getIndirectDests())
- Check(ArgBBs.count(BB), "Indirect label missing from arglist.", &CBI);
- }
verifyInlineAsmCall(CBI);
visitTerminator(CBI);
@@ -4489,6 +4490,55 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) {
}
}
+void Verifier::visitCallStackMetadata(MDNode *MD) {
+ // Call stack metadata should consist of a list of at least 1 constant int
+ // (representing a hash of the location).
+ Check(MD->getNumOperands() >= 1,
+ "call stack metadata should have at least 1 operand", MD);
+
+ for (const auto &Op : MD->operands())
+ Check(mdconst::dyn_extract_or_null<ConstantInt>(Op),
+ "call stack metadata operand should be constant integer", Op);
+}
+
+void Verifier::visitMemProfMetadata(Instruction &I, MDNode *MD) {
+ Check(isa<CallBase>(I), "!memprof metadata should only exist on calls", &I);
+ Check(MD->getNumOperands() >= 1,
+ "!memprof annotations should have at least 1 metadata operand "
+ "(MemInfoBlock)",
+ MD);
+
+ // Check each MIB
+ for (auto &MIBOp : MD->operands()) {
+ MDNode *MIB = dyn_cast<MDNode>(MIBOp);
+ // The first operand of an MIB should be the call stack metadata.
+ // There rest of the operands should be MDString tags, and there should be
+ // at least one.
+ Check(MIB->getNumOperands() >= 2,
+ "Each !memprof MemInfoBlock should have at least 2 operands", MIB);
+
+ // Check call stack metadata (first operand).
+ Check(MIB->getOperand(0) != nullptr,
+ "!memprof MemInfoBlock first operand should not be null", MIB);
+ Check(isa<MDNode>(MIB->getOperand(0)),
+ "!memprof MemInfoBlock first operand should be an MDNode", MIB);
+ MDNode *StackMD = dyn_cast<MDNode>(MIB->getOperand(0));
+ visitCallStackMetadata(StackMD);
+
+ // Check that remaining operands are MDString.
+ Check(std::all_of(MIB->op_begin() + 1, MIB->op_end(),
+ [](const MDOperand &Op) { return isa<MDString>(Op); }),
+ "Not all !memprof MemInfoBlock operands 1 to N are MDString", MIB);
+ }
+}
+
+void Verifier::visitCallsiteMetadata(Instruction &I, MDNode *MD) {
+ Check(isa<CallBase>(I), "!callsite metadata should only exist on calls", &I);
+ // Verify the partial callstack annotated from memprof profiles. This callsite
+ // is a part of a profiled allocation callstack.
+ visitCallStackMetadata(MD);
+}
+
void Verifier::visitAnnotationMetadata(MDNode *Annotation) {
Check(isa<MDTuple>(Annotation), "annotation must be a tuple");
Check(Annotation->getNumOperands() >= 1,
@@ -4735,6 +4785,12 @@ void Verifier::visitInstruction(Instruction &I) {
if (MDNode *MD = I.getMetadata(LLVMContext::MD_prof))
visitProfMetadata(I, MD);
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_memprof))
+ visitMemProfMetadata(I, MD);
+
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_callsite))
+ visitCallsiteMetadata(I, MD);
+
if (MDNode *Annotation = I.getMetadata(LLVMContext::MD_annotation))
visitAnnotationMetadata(Annotation);
@@ -5160,14 +5216,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
// In all other cases relocate should be tied to the statepoint directly.
// This covers relocates on a normal return path of invoke statepoint and
// relocates of a call statepoint.
- auto Token = Call.getArgOperand(0);
- Check(isa<GCStatepointInst>(Token),
+ auto *Token = Call.getArgOperand(0);
+ Check(isa<GCStatepointInst>(Token) || isa<UndefValue>(Token),
"gc relocate is incorrectly tied to the statepoint", Call, Token);
}
// Verify rest of the relocate arguments.
- const CallBase &StatepointCall =
- *cast<GCRelocateInst>(Call).getStatepoint();
+ const Value &StatepointCall = *cast<GCRelocateInst>(Call).getStatepoint();
// Both the base and derived must be piped through the safepoint.
Value *Base = Call.getArgOperand(1);
@@ -5182,7 +5237,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
const uint64_t DerivedIndex = cast<ConstantInt>(Derived)->getZExtValue();
// Check the bounds
- if (auto Opt = StatepointCall.getOperandBundle(LLVMContext::OB_gc_live)) {
+ if (isa<UndefValue>(StatepointCall))
+ break;
+ if (auto Opt = cast<GCStatepointInst>(StatepointCall)
+ .getOperandBundle(LLVMContext::OB_gc_live)) {
Check(BaseIndex < Opt->Inputs.size(),
"gc.relocate: statepoint base index out of bounds", Call);
Check(DerivedIndex < Opt->Inputs.size(),
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 9e89cce8312e..e31faf6422ed 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -1273,14 +1273,19 @@ Error IRLinker::linkModuleFlagsMetadata() {
// First build a map of the existing module flags and requirements.
DenseMap<MDString *, std::pair<MDNode *, unsigned>> Flags;
SmallSetVector<MDNode *, 16> Requirements;
+ SmallVector<unsigned, 0> Mins;
+ DenseSet<MDString *> SeenMin;
for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) {
MDNode *Op = DstModFlags->getOperand(I);
- ConstantInt *Behavior = mdconst::extract<ConstantInt>(Op->getOperand(0));
+ uint64_t Behavior =
+ mdconst::extract<ConstantInt>(Op->getOperand(0))->getZExtValue();
MDString *ID = cast<MDString>(Op->getOperand(1));
- if (Behavior->getZExtValue() == Module::Require) {
+ if (Behavior == Module::Require) {
Requirements.insert(cast<MDNode>(Op->getOperand(2)));
} else {
+ if (Behavior == Module::Min)
+ Mins.push_back(I);
Flags[ID] = std::make_pair(Op, I);
}
}
@@ -1296,6 +1301,7 @@ Error IRLinker::linkModuleFlagsMetadata() {
unsigned DstIndex;
std::tie(DstOp, DstIndex) = Flags.lookup(ID);
unsigned SrcBehaviorValue = SrcBehavior->getZExtValue();
+ SeenMin.insert(ID);
// If this is a requirement, add it and continue.
if (SrcBehaviorValue == Module::Require) {
@@ -1309,6 +1315,10 @@ Error IRLinker::linkModuleFlagsMetadata() {
// If there is no existing flag with this ID, just add it.
if (!DstOp) {
+ if (SrcBehaviorValue == Module::Min) {
+ Mins.push_back(DstModFlags->getNumOperands());
+ SeenMin.erase(ID);
+ }
Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands());
DstModFlags->addOperand(SrcOp);
continue;
@@ -1362,8 +1372,10 @@ Error IRLinker::linkModuleFlagsMetadata() {
"Expected MDTuple when appending module flags");
if (DstValue->isDistinct())
return dyn_cast<MDTuple>(DstValue);
+ ArrayRef<MDOperand> DstOperands = DstValue->operands();
MDTuple *New = MDTuple::getDistinct(
- DstM.getContext(), SmallVector<Metadata *, 4>(DstValue->operands()));
+ DstM.getContext(),
+ SmallVector<Metadata *, 4>(DstOperands.begin(), DstOperands.end()));
Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
MDNode *Flag = MDTuple::getDistinct(DstM.getContext(), FlagOps);
DstModFlags->setOperand(DstIndex, Flag);
@@ -1465,6 +1477,20 @@ Error IRLinker::linkModuleFlagsMetadata() {
}
+ // For the Min behavior, set the value to 0 if either module does not have the
+ // flag.
+ for (auto Idx : Mins) {
+ MDNode *Op = DstModFlags->getOperand(Idx);
+ MDString *ID = cast<MDString>(Op->getOperand(1));
+ if (!SeenMin.count(ID)) {
+ ConstantInt *V = mdconst::extract<ConstantInt>(Op->getOperand(2));
+ Metadata *FlagOps[] = {
+ Op->getOperand(0), ID,
+ ConstantAsMetadata::get(ConstantInt::get(V->getType(), 0))};
+ DstModFlags->setOperand(Idx, MDNode::get(DstM.getContext(), FlagOps));
+ }
+ }
+
// Check all of the requirements.
for (unsigned I = 0, E = Requirements.size(); I != E; ++I) {
MDNode *Requirement = Requirements[I];
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 78204ffe4c3b..0b4e9866d50a 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -144,9 +144,9 @@ struct ELFWriter {
uint64_t align(unsigned Alignment);
- bool maybeWriteCompression(uint64_t Size,
+ bool maybeWriteCompression(uint32_t ChType, uint64_t Size,
SmallVectorImpl<uint8_t> &CompressedContents,
- bool ZLibStyle, unsigned Alignment);
+ unsigned Alignment);
public:
ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS,
@@ -819,36 +819,25 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx,
// Include the debug info compression header.
bool ELFWriter::maybeWriteCompression(
- uint64_t Size, SmallVectorImpl<uint8_t> &CompressedContents, bool ZLibStyle,
- unsigned Alignment) {
- if (ZLibStyle) {
- uint64_t HdrSize =
- is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr);
- if (Size <= HdrSize + CompressedContents.size())
- return false;
- // Platform specific header is followed by compressed data.
- if (is64Bit()) {
- // Write Elf64_Chdr header.
- write(static_cast<ELF::Elf64_Word>(ELF::ELFCOMPRESS_ZLIB));
- write(static_cast<ELF::Elf64_Word>(0)); // ch_reserved field.
- write(static_cast<ELF::Elf64_Xword>(Size));
- write(static_cast<ELF::Elf64_Xword>(Alignment));
- } else {
- // Write Elf32_Chdr header otherwise.
- write(static_cast<ELF::Elf32_Word>(ELF::ELFCOMPRESS_ZLIB));
- write(static_cast<ELF::Elf32_Word>(Size));
- write(static_cast<ELF::Elf32_Word>(Alignment));
- }
- return true;
- }
-
- // "ZLIB" followed by 8 bytes representing the uncompressed size of the section,
- // useful for consumers to preallocate a buffer to decompress into.
- const StringRef Magic = "ZLIB";
- if (Size <= Magic.size() + sizeof(Size) + CompressedContents.size())
+ uint32_t ChType, uint64_t Size,
+ SmallVectorImpl<uint8_t> &CompressedContents, unsigned Alignment) {
+ uint64_t HdrSize =
+ is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr);
+ if (Size <= HdrSize + CompressedContents.size())
return false;
- W.OS << Magic;
- support::endian::write(W.OS, Size, support::big);
+ // Platform specific header is followed by compressed data.
+ if (is64Bit()) {
+ // Write Elf64_Chdr header.
+ write(static_cast<ELF::Elf64_Word>(ChType));
+ write(static_cast<ELF::Elf64_Word>(0)); // ch_reserved field.
+ write(static_cast<ELF::Elf64_Xword>(Size));
+ write(static_cast<ELF::Elf64_Xword>(Alignment));
+ } else {
+ // Write Elf32_Chdr header otherwise.
+ write(static_cast<ELF::Elf32_Word>(ChType));
+ write(static_cast<ELF::Elf32_Word>(Size));
+ write(static_cast<ELF::Elf32_Word>(Alignment));
+ }
return true;
}
@@ -867,38 +856,31 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
return;
}
- assert((MAI->compressDebugSections() == DebugCompressionType::Z ||
- MAI->compressDebugSections() == DebugCompressionType::GNU) &&
- "expected zlib or zlib-gnu style compression");
+ assert(MAI->compressDebugSections() == DebugCompressionType::Z &&
+ "expected zlib style compression");
SmallVector<char, 128> UncompressedData;
raw_svector_ostream VecOS(UncompressedData);
Asm.writeSectionData(VecOS, &Section, Layout);
- SmallVector<uint8_t, 128> CompressedContents;
+ SmallVector<uint8_t, 128> Compressed;
+ const uint32_t ChType = ELF::ELFCOMPRESS_ZLIB;
compression::zlib::compress(
makeArrayRef(reinterpret_cast<uint8_t *>(UncompressedData.data()),
UncompressedData.size()),
- CompressedContents);
+ Compressed);
- bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z;
- if (!maybeWriteCompression(UncompressedData.size(), CompressedContents,
- ZlibStyle, Sec.getAlignment())) {
+ if (!maybeWriteCompression(ChType, UncompressedData.size(), Compressed,
+ Sec.getAlignment())) {
W.OS << UncompressedData;
return;
}
- if (ZlibStyle) {
- // Set the compressed flag. That is zlib style.
- Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED);
- // Alignment field should reflect the requirements of
- // the compressed section header.
- Section.setAlignment(is64Bit() ? Align(8) : Align(4));
- } else {
- // Add "z" prefix to section name. This is zlib-gnu style.
- MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str());
- }
- W.OS << toStringRef(CompressedContents);
+ Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED);
+ // Alignment field should reflect the requirements of
+ // the compressed section header.
+ Section.setAlignment(is64Bit() ? Align(8) : Align(4));
+ W.OS << toStringRef(Compressed);
}
void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index d312e3521c9e..322ed8e23eb6 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -468,24 +468,6 @@ MCSectionMachO *MCContext::getMachOSection(StringRef Segment, StringRef Section,
return R.first->second;
}
-void MCContext::renameELFSection(MCSectionELF *Section, StringRef Name) {
- StringRef GroupName;
- if (const MCSymbol *Group = Section->getGroup())
- GroupName = Group->getName();
-
- // This function is only used by .debug*, which should not have the
- // SHF_LINK_ORDER flag.
- unsigned UniqueID = Section->getUniqueID();
- ELFUniquingMap.erase(
- ELFSectionKey{Section->getName(), GroupName, "", UniqueID});
- auto I = ELFUniquingMap
- .insert(std::make_pair(
- ELFSectionKey{Name, GroupName, "", UniqueID}, Section))
- .first;
- StringRef CachedName = I->first.SectionName;
- const_cast<MCSectionELF *>(Section)->setSectionName(CachedName);
-}
-
MCSectionELF *MCContext::createELFSectionImpl(StringRef Section, unsigned Type,
unsigned Flags, SectionKind K,
unsigned EntrySize,
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index 9f22b9b0a866..f358f593ff39 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -583,15 +583,27 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context,
return S;
}
-// Create the AddrSig section and first data fragment here as its layout needs
-// to be computed immediately after in order for it to be exported correctly.
+// The AddrSig section uses a series of relocations to refer to the symbols that
+// should be considered address-significant. The only interesting content of
+// these relocations is their symbol; the type, length etc will be ignored by
+// the linker. The reason we are not referring to the symbol indices directly is
+// that those indices will be invalidated by tools that update the symbol table.
+// Symbol relocations OTOH will have their indices updated by e.g. llvm-strip.
void MCMachOStreamer::createAddrSigSection() {
MCAssembler &Asm = getAssembler();
MCObjectWriter &writer = Asm.getWriter();
if (!writer.getEmitAddrsigSection())
return;
+ // Create the AddrSig section and first data fragment here as its layout needs
+ // to be computed immediately after in order for it to be exported correctly.
MCSection *AddrSigSection =
Asm.getContext().getObjectFileInfo()->getAddrSigSection();
Asm.registerSection(*AddrSigSection);
- new MCDataFragment(AddrSigSection);
+ auto *Frag = new MCDataFragment(AddrSigSection);
+ // We will generate a series of pointer-sized symbol relocations at offset
+ // 0x0. Set the section size to be large enough to contain a single pointer
+ // (instead of emitting a zero-sized section) so these relocations are
+ // technically valid, even though we don't expect these relocations to
+ // actually be applied by the linker.
+ Frag->getContents().resize(8);
}
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 694ea395fdec..af80d8327210 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -4238,11 +4238,8 @@ bool MasmParser::parseStructInitializer(const StructInfo &Structure,
}
}
// Default-initialize all remaining fields.
- for (auto It = Structure.Fields.begin() + FieldIndex;
- It != Structure.Fields.end(); ++It) {
- const FieldInfo &Field = *It;
+ for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex))
FieldInitializers.push_back(Field.Contents);
- }
if (EndToken) {
if (EndToken.value() == AsmToken::Greater)
@@ -4350,9 +4347,8 @@ bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
return true;
}
// Default-initialize all remaining values.
- for (auto it = Contents.Values.begin() + Initializer.Values.size();
- it != Contents.Values.end(); ++it) {
- const auto &Value = *it;
+ for (const auto &Value :
+ llvm::drop_begin(Contents.Values, Initializer.Values.size())) {
if (emitIntValue(Value, Field.Type))
return true;
}
@@ -4367,9 +4363,8 @@ bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
AsInt.getBitWidth() / 8);
}
// Default-initialize all remaining values.
- for (auto It = Contents.AsIntValues.begin() + Initializer.AsIntValues.size();
- It != Contents.AsIntValues.end(); ++It) {
- const auto &AsInt = *It;
+ for (const auto &AsInt :
+ llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) {
getStreamer().emitIntValue(AsInt.getLimitedValue(),
AsInt.getBitWidth() / 8);
}
@@ -4384,10 +4379,8 @@ bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
return true;
}
// Default-initialize all remaining values.
- for (auto It =
- Contents.Initializers.begin() + Initializer.Initializers.size();
- It != Contents.Initializers.end(); ++It) {
- const auto &Init = *It;
+ for (const auto &Init : llvm::drop_begin(Contents.Initializers,
+ Initializer.Initializers.size())) {
if (emitStructInitializer(Contents.Structure, Init))
return true;
}
@@ -4425,10 +4418,8 @@ bool MasmParser::emitStructInitializer(const StructInfo &Structure,
return true;
}
// Default-initialize all remaining fields.
- for (auto It =
- Structure.Fields.begin() + Initializer.FieldInitializers.size();
- It != Structure.Fields.end(); ++It) {
- const auto &Field = *It;
+ for (const auto &Field : llvm::drop_begin(
+ Structure.Fields, Initializer.FieldInitializers.size())) {
getStreamer().emitZeros(Field.Offset - Offset);
Offset = Field.Offset + Field.SizeOf;
if (emitFieldValue(Field))
@@ -4649,10 +4640,8 @@ bool MasmParser::parseDirectiveNestedEnds() {
if (ParentStruct.IsUnion) {
ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
} else {
- for (auto FieldIter = ParentStruct.Fields.begin() + OldFields;
- FieldIter != ParentStruct.Fields.end(); ++FieldIter) {
- FieldIter->Offset += FirstFieldOffset;
- }
+ for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields))
+ Field.Offset += FirstFieldOffset;
const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
if (!ParentStruct.IsUnion) {
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 5277ce87bee0..fdf8bbbe0a4d 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCPseudoProbe.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -519,7 +520,7 @@ void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) {
std::vector<uint64_t> Addresses;
for (auto Entry : Address2ProbesMap)
Addresses.push_back(Entry.first);
- std::sort(Addresses.begin(), Addresses.end());
+ llvm::sort(Addresses);
for (auto K : Addresses) {
OS << "Address:\t";
OS << K;
diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp
index 78d0d9cec556..038433cb24fa 100644
--- a/llvm/lib/MC/MachObjectWriter.cpp
+++ b/llvm/lib/MC/MachObjectWriter.cpp
@@ -753,32 +753,27 @@ static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
llvm_unreachable("Invalid mc version min type");
}
-// Encode addrsig data as symbol indexes in variable length encoding.
-void MachObjectWriter::writeAddrsigSection(MCAssembler &Asm) {
+void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) {
MCSection *AddrSigSection =
Asm.getContext().getObjectFileInfo()->getAddrSigSection();
- MCSection::FragmentListType &fragmentList = AddrSigSection->getFragmentList();
- if (!fragmentList.size())
- return;
-
- assert(fragmentList.size() == 1);
- MCFragment *pFragment = &*fragmentList.begin();
- MCDataFragment *pDataFragment = dyn_cast_or_null<MCDataFragment>(pFragment);
- assert(pDataFragment);
-
- raw_svector_ostream OS(pDataFragment->getContents());
- for (const MCSymbol *sym : this->getAddrsigSyms())
- encodeULEB128(sym->getIndex(), OS);
+ unsigned Log2Size = is64Bit() ? 3 : 2;
+ for (const MCSymbol *S : getAddrsigSyms()) {
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = 0;
+ MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28);
+ addRelocation(S, AddrSigSection, MRE);
+ }
}
uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
uint64_t StartOffset = W.OS.tell();
+ populateAddrSigSection(Asm);
+
// Compute symbol table information and bind symbol indices.
computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
UndefinedSymbolData);
- writeAddrsigSection(Asm);
if (!Asm.CGProfile.empty()) {
MCSection *CGProfileSection = Asm.getContext().getMachOSection(
diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 33e496b7a864..809ac37c3442 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -169,6 +169,7 @@ public:
Strings.clear();
SectionMap.clear();
SymbolMap.clear();
+ WeakDefaults.clear();
MCObjectWriter::reset();
}
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 977e77bf67fd..d46ae2247535 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -206,6 +206,7 @@ class XCOFFObjectWriter : public MCObjectWriter {
uint16_t SectionCount = 0;
uint64_t RelocationEntryOffset = 0;
std::vector<std::pair<std::string, size_t>> FileNames;
+ bool HasVisibility = false;
support::endian::Writer W;
std::unique_ptr<MCXCOFFObjectTargetWriter> TargetObjectWriter;
@@ -275,6 +276,7 @@ class XCOFFObjectWriter : public MCObjectWriter {
void writeSymbolEntryForDwarfSection(const XCOFFSection &DwarfSectionRef,
int16_t SectionIndex);
void writeFileHeader();
+ void writeAuxFileHeader();
void writeSectionHeaderTable();
void writeSections(const MCAssembler &Asm, const MCAsmLayout &Layout);
void writeSectionForControlSectionEntry(const MCAssembler &Asm,
@@ -308,14 +310,9 @@ class XCOFFObjectWriter : public MCObjectWriter {
void assignAddressesAndIndices(const MCAsmLayout &);
void finalizeSectionInfo();
- // TODO aux header support not implemented.
- bool needsAuxiliaryHeader() const { return false; }
-
- // Returns the size of the auxiliary header to be written to the object file.
size_t auxiliaryHeaderSize() const {
- assert(!needsAuxiliaryHeader() &&
- "Auxiliary header support not implemented.");
- return 0;
+ // 64-bit object files have no auxiliary header.
+ return HasVisibility && !is64Bit() ? XCOFF::AuxFileHeaderSizeShort : 0;
}
public:
@@ -468,6 +465,9 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(&S);
const MCSectionXCOFF *ContainingCsect = getContainingCsect(XSym);
+ if (XSym->getVisibilityType() != XCOFF::SYM_V_UNSPECIFIED)
+ HasVisibility = true;
+
if (ContainingCsect->getCSectType() == XCOFF::XTY_ER) {
// Handle undefined symbol.
UndefinedCsects.emplace_back(ContainingCsect);
@@ -648,6 +648,7 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm,
uint64_t StartOffset = W.OS.tell();
writeFileHeader();
+ writeAuxFileHeader();
writeSectionHeaderTable();
writeSections(Asm, Layout);
writeRelocations();
@@ -688,12 +689,6 @@ void XCOFFObjectWriter::writeSymbolEntry(StringRef SymbolName, uint64_t Value,
W.write<uint32_t>(Value);
}
W.write<int16_t>(SectionNumber);
- // Basic/Derived type. See the description of the n_type field for symbol
- // table entries for a detailed description. Since we don't yet support
- // visibility, and all other bits are either optionally set or reserved, this
- // is always zero.
- if (SymbolType != 0)
- report_fatal_error("Emitting non-zero visibilities is not supported yet.");
// TODO Set the function indicator (bit 10, 0x0020) for functions
// when debugging is enabled.
W.write<uint16_t>(SymbolType);
@@ -773,18 +768,32 @@ void XCOFFObjectWriter::writeFileHeader() {
W.write<int32_t>(0); // TimeStamp
writeWord(SymbolTableOffset);
if (is64Bit()) {
- W.write<uint16_t>(0); // AuxHeaderSize. No optional header for an object
- // file that is not to be loaded.
+ W.write<uint16_t>(auxiliaryHeaderSize());
W.write<uint16_t>(0); // Flags
W.write<int32_t>(SymbolTableEntryCount);
} else {
W.write<int32_t>(SymbolTableEntryCount);
- W.write<uint16_t>(0); // AuxHeaderSize. No optional header for an object
- // file that is not to be loaded.
+ W.write<uint16_t>(auxiliaryHeaderSize());
W.write<uint16_t>(0); // Flags
}
}
+void XCOFFObjectWriter::writeAuxFileHeader() {
+ if (!auxiliaryHeaderSize())
+ return;
+ W.write<uint16_t>(0); // Magic
+ W.write<uint16_t>(
+ XCOFF::NEW_XCOFF_INTERPRET); // Version. The new interpretation of the
+ // n_type field in the symbol table entry is
+ // used in XCOFF32.
+ W.write<uint32_t>(Sections[0]->Size); // TextSize
+ W.write<uint32_t>(Sections[1]->Size); // InitDataSize
+ W.write<uint32_t>(Sections[2]->Size); // BssDataSize
+ W.write<uint32_t>(0); // EntryPointAddr
+ W.write<uint32_t>(Sections[0]->Address); // TextStartAddr
+ W.write<uint32_t>(Sections[1]->Address); // DataStartAddr
+}
+
void XCOFFObjectWriter::writeSectionHeaderTable() {
auto writeSectionHeader = [&](const SectionEntry *Sec, bool IsDwarf) {
// Nothing to write for this Section.
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index f0e4f91cd347..8b44c09023f1 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -523,9 +523,6 @@ Error ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) {
case DebugCompressionType::None:
std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
return Error::success();
- case DebugCompressionType::GNU:
- llvm_unreachable("unexpected zlib-gnu");
- break;
case DebugCompressionType::Z:
Chdr.ch_type = ELF::ELFCOMPRESS_ZLIB;
break;
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index ad03f9cae9f8..1dffe007b9a9 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -965,14 +965,15 @@ StringRef Archive::Symbol::getName() const {
Expected<Archive::Child> Archive::Symbol::getMember() const {
const char *Buf = Parent->getSymbolTable().begin();
const char *Offsets = Buf;
- if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
+ if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64 ||
+ Parent->kind() == K_AIXBIG)
Offsets += sizeof(uint64_t);
else
Offsets += sizeof(uint32_t);
uint64_t Offset = 0;
if (Parent->kind() == K_GNU) {
Offset = read32be(Offsets + SymbolIndex * 4);
- } else if (Parent->kind() == K_GNU64) {
+ } else if (Parent->kind() == K_GNU64 || Parent->kind() == K_AIXBIG) {
Offset = read64be(Offsets + SymbolIndex * 8);
} else if (Parent->kind() == K_BSD) {
// The SymbolIndex is an index into the ranlib structs that start at
@@ -1105,6 +1106,8 @@ Archive::symbol_iterator Archive::symbol_begin() const {
// Skip the byte count of the string table.
buf += sizeof(uint64_t);
buf += ran_strx;
+ } else if (kind() == K_AIXBIG) {
+ buf = getStringTable().begin();
} else {
uint32_t member_count = 0;
uint32_t symbol_count = 0;
@@ -1127,7 +1130,7 @@ uint32_t Archive::getNumberOfSymbols() const {
const char *buf = getSymbolTable().begin();
if (kind() == K_GNU)
return read32be(buf);
- if (kind() == K_GNU64)
+ if (kind() == K_GNU64 || kind() == K_AIXBIG)
return read64be(buf);
if (kind() == K_BSD)
return read32le(buf) / 8;
@@ -1180,6 +1183,58 @@ BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
Err = malformedError("malformed AIX big archive: last member offset \"" +
RawOffset + "\" is not a number");
+ // Calculate the global symbol table.
+ uint64_t GlobSymOffset = 0;
+ RawOffset = getFieldRawString(ArFixLenHdr->GlobSymOffset);
+ if (RawOffset.getAsInteger(10, GlobSymOffset))
+ // TODO: add test case.
+ Err = malformedError(
+ "malformed AIX big archive: global symbol table offset \"" + RawOffset +
+ "\" is not a number");
+
+ if (Err)
+ return;
+
+ if (GlobSymOffset > 0) {
+ uint64_t BufferSize = Data.getBufferSize();
+ uint64_t GlobalSymTblContentOffset =
+ GlobSymOffset + sizeof(BigArMemHdrType);
+ if (GlobalSymTblContentOffset > BufferSize) {
+ Err = malformedError("global symbol table header at offset 0x" +
+ Twine::utohexstr(GlobSymOffset) + " and size 0x" +
+ Twine::utohexstr(sizeof(BigArMemHdrType)) +
+ " goes past the end of file");
+ return;
+ }
+
+ const char *GlobSymTblLoc = Data.getBufferStart() + GlobSymOffset;
+ const BigArMemHdrType *GlobalSymHdr =
+ reinterpret_cast<const BigArMemHdrType *>(GlobSymTblLoc);
+ RawOffset = getFieldRawString(GlobalSymHdr->Size);
+ uint64_t Size;
+ if (RawOffset.getAsInteger(10, Size)) {
+ // TODO: add test case.
+ Err = malformedError(
+ "malformed AIX big archive: global symbol table size \"" + RawOffset +
+ "\" is not a number");
+ return;
+ }
+ if (GlobalSymTblContentOffset + Size > BufferSize) {
+ Err = malformedError("global symbol table content at offset 0x" +
+ Twine::utohexstr(GlobalSymTblContentOffset) +
+ " and size 0x" + Twine::utohexstr(Size) +
+ " goes past the end of file");
+ return;
+ }
+ SymbolTable = StringRef(GlobSymTblLoc + sizeof(BigArMemHdrType), Size);
+ unsigned SymNum = getNumberOfSymbols();
+ unsigned SymOffsetsSize = 8 * (SymNum + 1);
+ uint64_t SymbolTableStringSize = Size - SymOffsetsSize;
+ StringTable =
+ StringRef(GlobSymTblLoc + sizeof(BigArMemHdrType) + SymOffsetsSize,
+ SymbolTableStringSize);
+ }
+
child_iterator I = child_begin(Err, false);
if (Err)
return;
diff --git a/llvm/lib/Object/Decompressor.cpp b/llvm/lib/Object/Decompressor.cpp
index a6a28a0589ac..3842ec92ccfc 100644
--- a/llvm/lib/Object/Decompressor.cpp
+++ b/llvm/lib/Object/Decompressor.cpp
@@ -23,9 +23,7 @@ Expected<Decompressor> Decompressor::create(StringRef Name, StringRef Data,
return createError("zlib is not available");
Decompressor D(Data);
- Error Err = isGnuStyle(Name) ? D.consumeCompressedGnuHeader()
- : D.consumeCompressedZLibHeader(Is64Bit, IsLE);
- if (Err)
+ if (Error Err = D.consumeCompressedZLibHeader(Is64Bit, IsLE))
return std::move(Err);
return D;
}
@@ -33,21 +31,6 @@ Expected<Decompressor> Decompressor::create(StringRef Name, StringRef Data,
Decompressor::Decompressor(StringRef Data)
: SectionData(Data), DecompressedSize(0) {}
-Error Decompressor::consumeCompressedGnuHeader() {
- if (!SectionData.startswith("ZLIB"))
- return createError("corrupted compressed section header");
-
- SectionData = SectionData.substr(4);
-
- // Consume uncompressed section size (big-endian 8 bytes).
- if (SectionData.size() < 8)
- return createError("corrupted uncompressed section size");
- DecompressedSize = read64be(SectionData.data());
- SectionData = SectionData.substr(8);
-
- return Error::success();
-}
-
Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit,
bool IsLittleEndian) {
using namespace ELF;
@@ -72,26 +55,6 @@ Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit,
return Error::success();
}
-bool Decompressor::isGnuStyle(StringRef Name) {
- return Name.startswith(".zdebug");
-}
-
-bool Decompressor::isCompressed(const object::SectionRef &Section) {
- if (Section.isCompressed())
- return true;
-
- Expected<StringRef> SecNameOrErr = Section.getName();
- if (SecNameOrErr)
- return isGnuStyle(*SecNameOrErr);
-
- consumeError(SecNameOrErr.takeError());
- return false;
-}
-
-bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) {
- return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name);
-}
-
Error Decompressor::decompress(MutableArrayRef<uint8_t> Buffer) {
size_t Size = Buffer.size();
return compression::zlib::uncompress(arrayRefFromStringRef(SectionData),
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index ce816b097691..d00359c6deef 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -204,7 +204,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr,
if (Expr.Extended) {
Ctx.Ptr = Start;
- while (1) {
+ while (true) {
uint8_t Opcode = readOpcode(Ctx);
switch (Opcode) {
case wasm::WASM_OPCODE_I32_CONST:
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 593243144f01..3b3eefcc29ca 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -172,7 +172,7 @@ static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
cl::desc("Eagerly invalidate more analyses in default pipelines"));
static cl::opt<bool> EnableNoRerunSimplificationPipeline(
- "enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden,
+ "enable-no-rerun-simplification-pipeline", cl::init(true), cl::Hidden,
cl::desc(
"Prevent running the simplification pipeline on a function more "
"than once in the case that SCC mutations cause a function to be "
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index bad8184dffcf..baea0eb53ef9 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -81,36 +81,35 @@ cl::opt<bool> PreservedCFGCheckerInstrumentation::VerifyPreservedCFG(
// facilities, the error message will be shown in place of the expected output.
//
enum class ChangePrinter {
- NoChangePrinter,
- PrintChangedVerbose,
- PrintChangedQuiet,
- PrintChangedDiffVerbose,
- PrintChangedDiffQuiet,
- PrintChangedColourDiffVerbose,
- PrintChangedColourDiffQuiet,
- PrintChangedDotCfgVerbose,
- PrintChangedDotCfgQuiet
+ None,
+ Verbose,
+ Quiet,
+ DiffVerbose,
+ DiffQuiet,
+ ColourDiffVerbose,
+ ColourDiffQuiet,
+ DotCfgVerbose,
+ DotCfgQuiet,
};
static cl::opt<ChangePrinter> PrintChanged(
"print-changed", cl::desc("Print changed IRs"), cl::Hidden,
- cl::ValueOptional, cl::init(ChangePrinter::NoChangePrinter),
+ cl::ValueOptional, cl::init(ChangePrinter::None),
cl::values(
- clEnumValN(ChangePrinter::PrintChangedQuiet, "quiet",
- "Run in quiet mode"),
- clEnumValN(ChangePrinter::PrintChangedDiffVerbose, "diff",
+ clEnumValN(ChangePrinter::Quiet, "quiet", "Run in quiet mode"),
+ clEnumValN(ChangePrinter::DiffVerbose, "diff",
"Display patch-like changes"),
- clEnumValN(ChangePrinter::PrintChangedDiffQuiet, "diff-quiet",
+ clEnumValN(ChangePrinter::DiffQuiet, "diff-quiet",
"Display patch-like changes in quiet mode"),
- clEnumValN(ChangePrinter::PrintChangedColourDiffVerbose, "cdiff",
+ clEnumValN(ChangePrinter::ColourDiffVerbose, "cdiff",
"Display patch-like changes with color"),
- clEnumValN(ChangePrinter::PrintChangedColourDiffQuiet, "cdiff-quiet",
+ clEnumValN(ChangePrinter::ColourDiffQuiet, "cdiff-quiet",
"Display patch-like changes in quiet mode with color"),
- clEnumValN(ChangePrinter::PrintChangedDotCfgVerbose, "dot-cfg",
+ clEnumValN(ChangePrinter::DotCfgVerbose, "dot-cfg",
"Create a website with graphical changes"),
- clEnumValN(ChangePrinter::PrintChangedDotCfgQuiet, "dot-cfg-quiet",
+ clEnumValN(ChangePrinter::DotCfgQuiet, "dot-cfg-quiet",
"Create a website with graphical changes in quiet mode"),
// Sentinel value for unspecified option.
- clEnumValN(ChangePrinter::PrintChangedVerbose, "", "")));
+ clEnumValN(ChangePrinter::Verbose, "", "")));
// An option that supports the -print-changed option. See
// the description for -print-changed for an explanation of the use
@@ -596,8 +595,8 @@ void TextChangeReporter<T>::handleIgnored(StringRef PassID, std::string &Name) {
IRChangedPrinter::~IRChangedPrinter() = default;
void IRChangedPrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {
- if (PrintChanged == ChangePrinter::PrintChangedVerbose ||
- PrintChanged == ChangePrinter::PrintChangedQuiet)
+ if (PrintChanged == ChangePrinter::Verbose ||
+ PrintChanged == ChangePrinter::Quiet)
TextChangeReporter<std::string>::registerRequiredCallbacks(PIC);
}
@@ -940,7 +939,22 @@ void PrintPassInstrumentation::registerCallbacks(
if (isSpecialPass(PassID, SpecialPasses))
return;
- print() << "Running pass: " << PassID << " on " << getIRName(IR) << "\n";
+ auto &OS = print();
+ OS << "Running pass: " << PassID << " on " << getIRName(IR);
+ if (any_isa<const Function *>(IR)) {
+ unsigned Count = any_cast<const Function *>(IR)->getInstructionCount();
+ OS << " (" << Count << " instruction";
+ if (Count != 1)
+ OS << 's';
+ OS << ')';
+ } else if (any_isa<const LazyCallGraph::SCC *>(IR)) {
+ int Count = any_cast<const LazyCallGraph::SCC *>(IR)->size();
+ OS << " (" << Count << " node";
+ if (Count != 1)
+ OS << 's';
+ OS << ')';
+ }
+ OS << "\n";
Indent += 2;
});
PIC.registerAfterPassCallback(
@@ -1260,10 +1274,10 @@ void InLineChangePrinter::handleFunctionCompare(
}
void InLineChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {
- if (PrintChanged == ChangePrinter::PrintChangedDiffVerbose ||
- PrintChanged == ChangePrinter::PrintChangedDiffQuiet ||
- PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose ||
- PrintChanged == ChangePrinter::PrintChangedColourDiffQuiet)
+ if (PrintChanged == ChangePrinter::DiffVerbose ||
+ PrintChanged == ChangePrinter::DiffQuiet ||
+ PrintChanged == ChangePrinter::ColourDiffVerbose ||
+ PrintChanged == ChangePrinter::ColourDiffQuiet)
TextChangeReporter<IRDataT<EmptyData>>::registerRequiredCallbacks(PIC);
}
@@ -2096,8 +2110,8 @@ DotCfgChangeReporter::~DotCfgChangeReporter() {
void DotCfgChangeReporter::registerCallbacks(
PassInstrumentationCallbacks &PIC) {
- if ((PrintChanged == ChangePrinter::PrintChangedDotCfgVerbose ||
- PrintChanged == ChangePrinter::PrintChangedDotCfgQuiet)) {
+ if (PrintChanged == ChangePrinter::DotCfgVerbose ||
+ PrintChanged == ChangePrinter::DotCfgQuiet) {
SmallString<128> OutputDir;
sys::fs::expand_tilde(DotCfgDir, OutputDir);
sys::fs::make_absolute(OutputDir);
@@ -2114,14 +2128,12 @@ void DotCfgChangeReporter::registerCallbacks(
StandardInstrumentations::StandardInstrumentations(
bool DebugLogging, bool VerifyEach, PrintPassOptions PrintPassOpts)
: PrintPass(DebugLogging, PrintPassOpts), OptNone(DebugLogging),
- PrintChangedIR(PrintChanged == ChangePrinter::PrintChangedVerbose),
- PrintChangedDiff(
- PrintChanged == ChangePrinter::PrintChangedDiffVerbose ||
- PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose,
- PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose ||
- PrintChanged == ChangePrinter::PrintChangedColourDiffQuiet),
- WebsiteChangeReporter(PrintChanged ==
- ChangePrinter::PrintChangedDotCfgVerbose),
+ PrintChangedIR(PrintChanged == ChangePrinter::Verbose),
+ PrintChangedDiff(PrintChanged == ChangePrinter::DiffVerbose ||
+ PrintChanged == ChangePrinter::ColourDiffVerbose,
+ PrintChanged == ChangePrinter::ColourDiffVerbose ||
+ PrintChanged == ChangePrinter::ColourDiffQuiet),
+ WebsiteChangeReporter(PrintChanged == ChangePrinter::DotCfgVerbose),
Verify(DebugLogging), VerifyEach(VerifyEach) {}
PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter =
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index f4f13bafb233..8c1eadbe8271 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -349,7 +349,7 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
StringRef CompilationDir) {
auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename);
if (Error E = ProfileReaderOrErr.takeError())
- return std::move(E);
+ return createFileError(ProfileFilename, std::move(E));
auto ProfileReader = std::move(ProfileReaderOrErr.get());
auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping());
bool DataFound = false;
@@ -358,7 +358,7 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
auto CovMappingBufOrErr = MemoryBuffer::getFileOrSTDIN(
File.value(), /*IsText=*/false, /*RequiresNullTerminator=*/false);
if (std::error_code EC = CovMappingBufOrErr.getError())
- return errorCodeToError(EC);
+ return createFileError(File.value(), errorCodeToError(EC));
StringRef Arch = Arches.empty() ? StringRef() : Arches[File.index()];
MemoryBufferRef CovMappingBufRef =
CovMappingBufOrErr.get()->getMemBufferRef();
@@ -368,7 +368,7 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
if (Error E = CoverageReadersOrErr.takeError()) {
E = handleMaybeNoDataFoundError(std::move(E));
if (E)
- return std::move(E);
+ return createFileError(File.value(), std::move(E));
// E == success (originally a no_data_found error).
continue;
}
@@ -378,12 +378,14 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
Readers.push_back(std::move(Reader));
DataFound |= !Readers.empty();
if (Error E = loadFromReaders(Readers, *ProfileReader, *Coverage))
- return std::move(E);
+ return createFileError(File.value(), std::move(E));
}
// If no readers were created, either no objects were provided or none of them
// had coverage data. Return an error in the latter case.
if (!DataFound && !ObjectFilenames.empty())
- return make_error<CoverageMapError>(coveragemap_error::no_data_found);
+ return createFileError(
+ join(ObjectFilenames.begin(), ObjectFilenames.end(), ", "),
+ make_error<CoverageMapError>(coveragemap_error::no_data_found));
return std::move(Coverage);
}
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index ee8989979a26..23804ce604c4 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1026,20 +1026,50 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
return *Symtab;
}
-Expected<InstrProfRecord>
-IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
- uint64_t FuncHash) {
+Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord(
+ StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum) {
ArrayRef<NamedInstrProfRecord> Data;
+ uint64_t FuncSum = 0;
Error Err = Remapper->getRecords(FuncName, Data);
if (Err)
return std::move(Err);
// Found it. Look for counters with the right hash.
+
+ // A flag to indicate if the records are from the same type
+ // of profile (i.e cs vs nocs).
+ bool CSBitMatch = false;
+ auto getFuncSum = [](const std::vector<uint64_t> &Counts) {
+ uint64_t ValueSum = 0;
+ for (unsigned I = 0, S = Counts.size(); I < S; I++) {
+ uint64_t CountValue = Counts[I];
+ if (CountValue == (uint64_t)-1)
+ continue;
+ // Handle overflow -- if that happens, return max.
+ if (std::numeric_limits<uint64_t>::max() - CountValue <= ValueSum)
+ return std::numeric_limits<uint64_t>::max();
+ ValueSum += CountValue;
+ }
+ return ValueSum;
+ };
+
for (const NamedInstrProfRecord &I : Data) {
// Check for a match and fill the vector if there is one.
if (I.Hash == FuncHash)
return std::move(I);
+ if (NamedInstrProfRecord::hasCSFlagInHash(I.Hash) ==
+ NamedInstrProfRecord::hasCSFlagInHash(FuncHash)) {
+ CSBitMatch = true;
+ if (MismatchedFuncSum == nullptr)
+ continue;
+ FuncSum = std::max(FuncSum, getFuncSum(I.Counts));
+ }
+ }
+ if (CSBitMatch) {
+ if (MismatchedFuncSum != nullptr)
+ *MismatchedFuncSum = FuncSum;
+ return error(instrprof_error::hash_mismatch);
}
- return error(instrprof_error::hash_mismatch);
+ return error(instrprof_error::unknown_function);
}
Expected<memprof::MemProfRecord>
diff --git a/llvm/lib/Support/AddressRanges.cpp b/llvm/lib/Support/AddressRanges.cpp
index 5ba011bac4e9..187d5be00dae 100644
--- a/llvm/lib/Support/AddressRanges.cpp
+++ b/llvm/lib/Support/AddressRanges.cpp
@@ -12,48 +12,59 @@
using namespace llvm;
-void AddressRanges::insert(AddressRange Range) {
+AddressRanges::Collection::const_iterator
+AddressRanges::insert(AddressRange Range) {
if (Range.size() == 0)
- return;
+ return Ranges.end();
auto It = llvm::upper_bound(Ranges, Range);
auto It2 = It;
- while (It2 != Ranges.end() && It2->start() < Range.end())
+ while (It2 != Ranges.end() && It2->start() <= Range.end())
++It2;
if (It != It2) {
- Range = {Range.start(), std::max(Range.end(), It2[-1].end())};
+ Range = {Range.start(), std::max(Range.end(), std::prev(It2)->end())};
It = Ranges.erase(It, It2);
}
- if (It != Ranges.begin() && Range.start() < It[-1].end())
- It[-1] = {It[-1].start(), std::max(It[-1].end(), Range.end())};
- else
- Ranges.insert(It, Range);
+ if (It != Ranges.begin() && Range.start() <= std::prev(It)->end()) {
+ --It;
+ *It = {It->start(), std::max(It->end(), Range.end())};
+ return It;
+ }
+
+ return Ranges.insert(It, Range);
}
-bool AddressRanges::contains(uint64_t Addr) const {
+AddressRanges::Collection::const_iterator
+AddressRanges::find(uint64_t Addr) const {
auto It = std::partition_point(
Ranges.begin(), Ranges.end(),
[=](const AddressRange &R) { return R.start() <= Addr; });
- return It != Ranges.begin() && Addr < It[-1].end();
+
+ if (It == Ranges.begin())
+ return Ranges.end();
+
+ --It;
+ if (Addr >= It->end())
+ return Ranges.end();
+
+ return It;
}
-bool AddressRanges::contains(AddressRange Range) const {
+AddressRanges::Collection::const_iterator
+AddressRanges::find(AddressRange Range) const {
if (Range.size() == 0)
- return false;
+ return Ranges.end();
+
auto It = std::partition_point(
Ranges.begin(), Ranges.end(),
[=](const AddressRange &R) { return R.start() <= Range.start(); });
+
if (It == Ranges.begin())
- return false;
- return Range.end() <= It[-1].end();
-}
+ return Ranges.end();
-Optional<AddressRange>
-AddressRanges::getRangeThatContains(uint64_t Addr) const {
- auto It = std::partition_point(
- Ranges.begin(), Ranges.end(),
- [=](const AddressRange &R) { return R.start() <= Addr; });
- if (It != Ranges.begin() && Addr < It[-1].end())
- return It[-1];
- return llvm::None;
+ --It;
+ if (Range.end() > It->end())
+ return Ranges.end();
+
+ return It;
}
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index e3df172ef113..5e7d63165130 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -2382,7 +2382,7 @@ protected:
for (size_t I = 0, E = Opts.size(); I != E; ++I) {
Option *Opt = Opts[I].second;
for (auto &Cat : Opt->Categories) {
- assert(find(SortedCategories, Cat) != SortedCategories.end() &&
+ assert(llvm::is_contained(SortedCategories, Cat) &&
"Option has an unregistered category");
CategorizedOptions[Cat].push_back(Opt);
}
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index 21191972fb8b..e8fb715aa770 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -20,6 +20,9 @@
#if LLVM_ENABLE_ZLIB
#include <zlib.h>
#endif
+#if LLVM_ENABLE_ZSTD
+#include <zstd.h>
+#endif
using namespace llvm;
using namespace llvm::compression;
@@ -100,3 +103,65 @@ Error zlib::uncompress(ArrayRef<uint8_t> Input,
llvm_unreachable("zlib::uncompress is unavailable");
}
#endif
+
+#if LLVM_ENABLE_ZSTD
+
+bool zstd::isAvailable() { return true; }
+
+void zstd::compress(ArrayRef<uint8_t> Input,
+ SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
+ unsigned long CompressedBufferSize = ::ZSTD_compressBound(Input.size());
+ CompressedBuffer.resize_for_overwrite(CompressedBufferSize);
+ unsigned long CompressedSize =
+ ::ZSTD_compress((char *)CompressedBuffer.data(), CompressedBufferSize,
+ (const char *)Input.data(), Input.size(), Level);
+ if (ZSTD_isError(CompressedSize))
+ report_bad_alloc_error("Allocation failed");
+ // Tell MemorySanitizer that zstd output buffer is fully initialized.
+ // This avoids a false report when running LLVM with uninstrumented ZLib.
+ __msan_unpoison(CompressedBuffer.data(), CompressedSize);
+ if (CompressedSize < CompressedBuffer.size())
+ CompressedBuffer.truncate(CompressedSize);
+}
+
+Error zstd::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
+ size_t &UncompressedSize) {
+ const size_t Res =
+ ::ZSTD_decompress(UncompressedBuffer, UncompressedSize,
+ (const uint8_t *)Input.data(), Input.size());
+ UncompressedSize = Res;
+ // Tell MemorySanitizer that zstd output buffer is fully initialized.
+ // This avoids a false report when running LLVM with uninstrumented ZLib.
+ __msan_unpoison(UncompressedBuffer, UncompressedSize);
+ return ZSTD_isError(Res) ? make_error<StringError>(ZSTD_getErrorName(Res),
+ inconvertibleErrorCode())
+ : Error::success();
+}
+
+Error zstd::uncompress(ArrayRef<uint8_t> Input,
+ SmallVectorImpl<uint8_t> &UncompressedBuffer,
+ size_t UncompressedSize) {
+ UncompressedBuffer.resize_for_overwrite(UncompressedSize);
+ Error E =
+ zstd::uncompress(Input, UncompressedBuffer.data(), UncompressedSize);
+ if (UncompressedSize < UncompressedBuffer.size())
+ UncompressedBuffer.truncate(UncompressedSize);
+ return E;
+}
+
+#else
+bool zstd::isAvailable() { return false; }
+void zstd::compress(ArrayRef<uint8_t> Input,
+ SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
+ llvm_unreachable("zstd::compress is unavailable");
+}
+Error zstd::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
+ size_t &UncompressedSize) {
+ llvm_unreachable("zstd::uncompress is unavailable");
+}
+Error zstd::uncompress(ArrayRef<uint8_t> Input,
+ SmallVectorImpl<uint8_t> &UncompressedBuffer,
+ size_t UncompressedSize) {
+ llvm_unreachable("zstd::uncompress is unavailable");
+}
+#endif
diff --git a/llvm/lib/Support/DivisionByConstantInfo.cpp b/llvm/lib/Support/DivisionByConstantInfo.cpp
index 69f39386798c..35486674e02f 100644
--- a/llvm/lib/Support/DivisionByConstantInfo.cpp
+++ b/llvm/lib/Support/DivisionByConstantInfo.cpp
@@ -1,4 +1,4 @@
-//===----- DivisonByConstantInfo.cpp - division by constant -*- C++ -*-----===//
+//===----- DivisionByConstantInfo.cpp - division by constant -*- C++ -*----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -62,11 +62,11 @@ SignedDivisionByConstantInfo SignedDivisionByConstantInfo::get(const APInt &D) {
/// S. Warren, Jr., chapter 10.
/// LeadingZeros can be used to simplify the calculation if the upper bits
/// of the divided value are known zero.
-UnsignedDivisonByConstantInfo
-UnsignedDivisonByConstantInfo::get(const APInt &D, unsigned LeadingZeros) {
+UnsignedDivisionByConstantInfo
+UnsignedDivisionByConstantInfo::get(const APInt &D, unsigned LeadingZeros) {
unsigned P;
APInt NC, Delta, Q1, R1, Q2, R2;
- struct UnsignedDivisonByConstantInfo Retval;
+ struct UnsignedDivisionByConstantInfo Retval;
Retval.IsAdd = false; // initialize "add" indicator
APInt AllOnes = APInt::getAllOnes(D.getBitWidth()).lshr(LeadingZeros);
APInt SignedMin = APInt::getSignedMinValue(D.getBitWidth());
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 7fe04af4696b..0fe286d239d4 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -77,6 +77,8 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
{"zkt", RISCVExtensionVersion{1, 0}},
{"zk", RISCVExtensionVersion{1, 0}},
+ {"zmmul", RISCVExtensionVersion{1, 0}},
+
{"v", RISCVExtensionVersion{1, 0}},
{"zvl32b", RISCVExtensionVersion{1, 0}},
{"zvl64b", RISCVExtensionVersion{1, 0}},
diff --git a/llvm/lib/TableGen/JSONBackend.cpp b/llvm/lib/TableGen/JSONBackend.cpp
index e38903910275..6dc466e29df3 100644
--- a/llvm/lib/TableGen/JSONBackend.cpp
+++ b/llvm/lib/TableGen/JSONBackend.cpp
@@ -129,13 +129,13 @@ void JSONEmitter::run(raw_ostream &OS) {
// construct the array for each one.
std::map<std::string, json::Array> instance_lists;
for (const auto &C : Records.getClasses()) {
- auto &Name = C.second->getNameInitAsString();
+ const auto Name = C.second->getNameInitAsString();
(void)instance_lists[Name];
}
// Main iteration over the defs.
for (const auto &D : Records.getDefs()) {
- auto &Name = D.second->getNameInitAsString();
+ const auto Name = D.second->getNameInitAsString();
auto &Def = *D.second;
json::Object obj;
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 75a99e95541a..6b899a049e6b 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -2424,6 +2424,14 @@ void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
if (PrintSem) OS << ";\n";
}
+void Record::updateClassLoc(SMLoc Loc) {
+ assert(Locs.size() == 1);
+ ForwardDeclarationLocs.push_back(Locs.front());
+
+ Locs.clear();
+ Locs.push_back(Loc);
+}
+
void Record::checkName() {
// Ensure the record name has string type.
const TypedInit *TypedName = cast<const TypedInit>(Name);
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index acf93dc3d792..aab1802c5348 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -3391,6 +3391,8 @@ bool TGParser::ParseClass() {
!CurRec->getTemplateArgs().empty())
return TokError("Class '" + CurRec->getNameInitAsString() +
"' already defined");
+
+ CurRec->updateClassLoc(Lex.getLoc());
} else {
// If this is the first reference to this class, create and add it.
auto NewRec =
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index b332e9dcb176..8fb5d49e2121 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -216,7 +216,7 @@ def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
"IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">;
def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address",
- "IsStoreAddressAscend", "false",
+ "IsStoreAddressAscend", "true",
"Schedule vector stores by ascending address">;
def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow",
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 82fe5772c99d..00621b84d2f2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -69,6 +69,7 @@ public:
bool tryMLAV64LaneV128(SDNode *N);
bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
+ bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
@@ -893,6 +894,30 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
return isWorthFolding(N);
}
+/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
+/// operand is refered by the instructions have SP operand
+bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
+ SDValue &Shift) {
+ unsigned ShiftVal = 0;
+ AArch64_AM::ShiftExtendType Ext;
+
+ if (N.getOpcode() != ISD::SHL)
+ return false;
+
+ ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!CSD)
+ return false;
+ ShiftVal = CSD->getZExtValue();
+ if (ShiftVal > 4)
+ return false;
+
+ Ext = AArch64_AM::UXTX;
+ Reg = N.getOperand(0);
+ Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
+ MVT::i32);
+ return isWorthFolding(N);
+}
+
/// If there's a use of this ADDlow that's not itself a load/store then we'll
/// need to create a real ADD instruction from it anyway and there's no point in
/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
@@ -4049,6 +4074,24 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case Intrinsic::swift_async_context_addr: {
+ SDLoc DL(Node);
+ SDValue Chain = Node->getOperand(0);
+ SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
+ SDValue Res = SDValue(
+ CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
+ CurDAG->getTargetConstant(8, DL, MVT::i32),
+ CurDAG->getTargetConstant(0, DL, MVT::i32)),
+ 0);
+ ReplaceUses(SDValue(Node, 0), Res);
+ ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
+ CurDAG->RemoveDeadNode(Node);
+
+ auto &MF = CurDAG->getMachineFunction();
+ MF.getFrameInfo().setFrameAddressIsTaken(true);
+ MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
+ return;
+ }
}
} break;
case ISD::INTRINSIC_WO_CHAIN: {
@@ -4094,18 +4137,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
if (tryMULLV64LaneV128(IntNo, Node))
return;
break;
- case Intrinsic::swift_async_context_addr: {
- SDLoc DL(Node);
- CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64,
- CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
- AArch64::FP, MVT::i64),
- CurDAG->getTargetConstant(8, DL, MVT::i32),
- CurDAG->getTargetConstant(0, DL, MVT::i32));
- auto &MF = CurDAG->getMachineFunction();
- MF.getFrameInfo().setFrameAddressIsTaken(true);
- MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
- return;
- }
}
break;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 447ad10ddf22..e070ce2efa6b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -521,6 +521,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
setOperationAction(ISD::CTPOP, MVT::i128, Custom);
+ setOperationAction(ISD::PARITY, MVT::i64, Custom);
+ setOperationAction(ISD::PARITY, MVT::i128, Custom);
+
setOperationAction(ISD::ABS, MVT::i32, Custom);
setOperationAction(ISD::ABS, MVT::i64, Custom);
@@ -5463,7 +5466,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::SRA_PARTS:
return LowerShiftParts(Op, DAG);
case ISD::CTPOP:
- return LowerCTPOP(Op, DAG);
+ case ISD::PARITY:
+ return LowerCTPOP_PARITY(Op, DAG);
case ISD::FCOPYSIGN:
return LowerFCOPYSIGN(Op, DAG);
case ISD::OR:
@@ -7783,7 +7787,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
return BitCast(VT, BSP, DAG);
}
-SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
+ SelectionDAG &DAG) const {
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat))
return SDValue();
@@ -7791,6 +7796,8 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->hasNEON())
return SDValue();
+ bool IsParity = Op.getOpcode() == ISD::PARITY;
+
// While there is no integer popcount instruction, it can
// be more efficiently lowered to the following sequence that uses
// AdvSIMD registers/instructions as long as the copies to/from
@@ -7813,6 +7820,10 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+ if (IsParity)
+ UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
+ DAG.getConstant(1, DL, MVT::i32));
+
if (VT == MVT::i64)
UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
return UaddLV;
@@ -7824,9 +7835,15 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
+ if (IsParity)
+ UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
+ DAG.getConstant(1, DL, MVT::i32));
+
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
}
+ assert(!IsParity && "ISD::PARITY of vector types not supported");
+
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
@@ -11811,6 +11828,12 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
isConcatMask(M, VT, VT.getSizeInBits() == 128));
}
+bool AArch64TargetLowering::isVectorClearMaskLegal(ArrayRef<int> M,
+ EVT VT) const {
+ // Just delegate to the generic legality, clear masks aren't special.
+ return isShuffleMaskLegal(M, VT);
+}
+
/// getVShiftImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift operation, where all the elements of the
/// build_vector must have the same constant integer value.
@@ -11969,6 +11992,11 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
if (IsZero)
return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
+ case AArch64CC::LE:
+ if (!NoNans)
+ return SDValue();
+ // If we ignore NaNs then we can use to the LS implementation.
+ LLVM_FALLTHROUGH;
case AArch64CC::LS:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
@@ -12073,7 +12101,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
bool ShouldInvert;
changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
- bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
+ bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
SDValue Cmp =
EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
if (!Cmp.getNode())
@@ -13587,21 +13615,50 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
bool
AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const {
- N = N->getOperand(0).getNode();
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
+ SDValue ShiftLHS = N->getOperand(0);
EVT VT = N->getValueType(0);
- // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
- // it with shift to let it be lowered to UBFX.
- if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
- isa<ConstantSDNode>(N->getOperand(1))) {
- uint64_t TruncMask = N->getConstantOperandVal(1);
+
+ // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine
+ // it with shift 'N' to let it be lowered to UBFX.
+ if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
+ isa<ConstantSDNode>(ShiftLHS.getOperand(1))) {
+ uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1);
if (isMask_64(TruncMask) &&
- N->getOperand(0).getOpcode() == ISD::SRL &&
- isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
+ ShiftLHS.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(ShiftLHS.getOperand(0).getOperand(1)))
return false;
}
return true;
}
+bool AArch64TargetLowering::isDesirableToCommuteXorWithShift(
+ const SDNode *N) const {
+ assert(N->getOpcode() == ISD::XOR &&
+ (N->getOperand(0).getOpcode() == ISD::SHL ||
+ N->getOperand(0).getOpcode() == ISD::SRL) &&
+ "Expected XOR(SHIFT) pattern");
+
+ // Only commute if the entire NOT mask is a hidden shifted mask.
+ auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
+ if (XorC && ShiftC) {
+ unsigned MaskIdx, MaskLen;
+ if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
+ unsigned ShiftAmt = ShiftC->getZExtValue();
+ unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
+ if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
+ return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
+ }
+ }
+
+ return false;
+}
+
bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
const SDNode *N, CombineLevel Level) const {
assert(((N->getOpcode() == ISD::SHL &&
@@ -19221,6 +19278,41 @@ static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv);
}
+static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ SDValue Insert = N->getOperand(0);
+ if (Insert.getOpcode() != ISD::INSERT_SUBVECTOR)
+ return SDValue();
+
+ if (!Insert.getOperand(0).isUndef())
+ return SDValue();
+
+ uint64_t IdxInsert = Insert.getConstantOperandVal(2);
+ uint64_t IdxDupLane = N->getConstantOperandVal(1);
+ if (IdxInsert != IdxDupLane)
+ return SDValue();
+
+ SDValue Bitcast = Insert.getOperand(1);
+ if (Bitcast.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ SDValue Subvec = Bitcast.getOperand(0);
+ EVT SubvecVT = Subvec.getValueType();
+ if (!SubvecVT.is128BitVector())
+ return SDValue();
+ EVT NewSubvecVT =
+ getPackedSVEVectorVT(Subvec.getValueType().getVectorElementType());
+
+ SDLoc DL(N);
+ SDValue NewInsert =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewSubvecVT,
+ DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
+ SDValue NewDuplane128 = DAG.getNode(AArch64ISD::DUPLANE128, DL, NewSubvecVT,
+ NewInsert, N->getOperand(1));
+ return DAG.getNode(ISD::BITCAST, DL, VT, NewDuplane128);
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -19307,6 +19399,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performCSELCombine(N, DCI, DAG);
case AArch64ISD::DUP:
return performDUPCombine(N, DCI);
+ case AArch64ISD::DUPLANE128:
+ return performDupLane128Combine(N, DAG);
case AArch64ISD::NVCAST:
return performNVCASTCombine(N);
case AArch64ISD::SPLICE:
@@ -19981,7 +20075,8 @@ void AArch64TargetLowering::ReplaceNodeResults(
return;
case ISD::CTPOP:
- if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
+ case ISD::PARITY:
+ if (SDValue Result = LowerCTPOP_PARITY(SDValue(N, 0), DAG))
Results.push_back(Result);
return;
case AArch64ISD::SADDV:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index e02b5e56fd2e..1ba2e2f315ec 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -549,6 +549,10 @@ public:
/// should be stack expanded.
bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
+ /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
+ /// shuffle mask can be codegen'd directly.
+ bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
+
/// Return the ISD::SETCC ValueType.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
@@ -653,6 +657,9 @@ public:
bool isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const override;
+ /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
+ bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
+
/// Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
@@ -995,7 +1002,7 @@ private:
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 02fa36a1df4b..e70d304f37b9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1168,6 +1168,8 @@ def gi_arith_extended_reg32to64_i64 :
GIComplexOperandMatcher<s64, "selectArithExtendedRegister">,
GIComplexPatternEquiv<arith_extended_reg32to64_i64>;
+def arith_uxtx : ComplexPattern<i64, 2, "SelectArithUXTXRegister", []>;
+
// Floating-point immediate.
def fpimm16XForm : SDNodeXForm<fpimm, [{
@@ -1234,6 +1236,10 @@ def fpimm0 : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(+0.0);
}]>;
+def fpimm_minus0 : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(-0.0);
+}]>;
+
def fpimm_half : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(+0.5);
}]>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index d444223e4494..a7b7e5270888 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1691,6 +1691,11 @@ def : InstAlias<"mov $dst, $src",
defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
+def copyFromSP: PatLeaf<(i64 GPR64:$src), [{
+ return N->getOpcode() == ISD::CopyFromReg &&
+ cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
+}]>;
+
// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
(SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
@@ -1709,6 +1714,8 @@ def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
(SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
(SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
+def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)),
+ (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>;
}
// Because of the immediate format for add/sub-imm instructions, the
@@ -5293,6 +5300,9 @@ def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)),
// CodeGen patterns for addhn and subhn instructions, which can actually be
// written in LLVM IR without too much difficulty.
+// Prioritize ADDHN and SUBHN over UZP2.
+let AddedComplexity = 10 in {
+
// ADDHN
def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
(ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
@@ -5343,6 +5353,8 @@ def : Pat<(concat_vectors (v2i32 V64:$Rd),
(SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;
+} // AddedComplexity = 10
+
//----------------------------------------------------------------------------
// AdvSIMD bitwise extract from vector instruction.
//----------------------------------------------------------------------------
@@ -5409,6 +5421,19 @@ def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
(v2i32 (trunc (v2i64 V128:$Vm))))),
(UZP1v4i32 V128:$Vn, V128:$Vm)>;
+def : Pat<(v16i8 (concat_vectors
+ (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
+ (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))),
+ (UZP2v16i8 V128:$Vn, V128:$Vm)>;
+def : Pat<(v8i16 (concat_vectors
+ (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))),
+ (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))),
+ (UZP2v8i16 V128:$Vn, V128:$Vm)>;
+def : Pat<(v4i32 (concat_vectors
+ (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))),
+ (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))),
+ (UZP2v4i32 V128:$Vn, V128:$Vm)>;
+
//----------------------------------------------------------------------------
// AdvSIMD TBL/TBX instructions
//----------------------------------------------------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp
index 6c8845ee8598..79866c9b0a05 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp
@@ -22,8 +22,8 @@ static bool needReorderStoreMI(const MachineInstr *MI) {
return false;
case AArch64::STURQi:
case AArch64::STRQui:
- if (MI->getMF()->getSubtarget<AArch64Subtarget>().isStoreAddressAscend())
- return false;
+ if (!MI->getMF()->getSubtarget<AArch64Subtarget>().isStoreAddressAscend())
+ return false;
LLVM_FALLTHROUGH;
case AArch64::STPQi:
return AArch64InstrInfo::getLdStOffsetOp(*MI).isImm();
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c66f9cfd9c22..4032c4667bc7 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -278,10 +278,18 @@ def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch
def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>;
def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>;
-def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
-def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
-def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
-def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;
+def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3,
+ [SDTCisVec<1>, SDTCVecEltisVT<1,i1>, SDTCisVec<3>, SDTCisSameNumEltsAs<1,3>]>;
+def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
+def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
+def AArch64fadda_p_node : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;
+
+def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3),
+ [(AArch64fadda_p_node node:$op1, node:$op2, node:$op3),
+ (AArch64fadda_p_node (SVEAllActive), node:$op2,
+ (vselect node:$op1, node:$op3, (splat_vector (f32 fpimm_minus0)))),
+ (AArch64fadda_p_node (SVEAllActive), node:$op2,
+ (vselect node:$op1, node:$op3, (splat_vector (f64 fpimm_minus0))))]>;
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
@@ -447,6 +455,16 @@ let Predicates = [HasSVEorSME] in {
defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>;
defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>;
+ // zext(cmpeq(x, splat(0))) -> cnot(x)
+ def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive):$Pg), nxv16i8:$Op2, (SVEDup0), SETEQ)))),
+ (CNOT_ZPmZ_B $Op2, $Pg, $Op2)>;
+ def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive):$Pg), nxv8i16:$Op2, (SVEDup0), SETEQ)))),
+ (CNOT_ZPmZ_H $Op2, $Pg, $Op2)>;
+ def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive):$Pg), nxv4i32:$Op2, (SVEDup0), SETEQ)))),
+ (CNOT_ZPmZ_S $Op2, $Pg, $Op2)>;
+ def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive):$Pg), nxv2i64:$Op2, (SVEDup0), SETEQ)))),
+ (CNOT_ZPmZ_D $Op2, $Pg, $Op2)>;
+
defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>;
defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>;
defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>;
@@ -857,6 +875,16 @@ let Predicates = [HasSVEorSME] in {
defm LD1RQ_W : sve_mem_ldqr_ss<0b10, "ld1rqw", Z_s, ZPR32, GPR64NoXZRshifted32>;
defm LD1RQ_D : sve_mem_ldqr_ss<0b11, "ld1rqd", Z_d, ZPR64, GPR64NoXZRshifted64>;
+ let AddedComplexity = 1 in {
+ class LD1RQPat<ValueType vt1, ValueType vt2, SDPatternOperator op, Instruction load_instr, Instruction ptrue> :
+ Pat<(vt1 (op (vt1 (vector_insert_subvec (vt1 undef), (vt2 (load GPR64sp:$Xn)), (i64 0))), (i64 0))),
+ (load_instr (ptrue 31), GPR64sp:$Xn, 0)>;
+ }
+ def : LD1RQPat<nxv16i8, v16i8, AArch64duplane128, LD1RQ_B_IMM, PTRUE_B>;
+ def : LD1RQPat<nxv8i16, v8i16, AArch64duplane128, LD1RQ_H_IMM, PTRUE_H>;
+ def : LD1RQPat<nxv4i32, v4i32, AArch64duplane128, LD1RQ_W_IMM, PTRUE_S>;
+ def : LD1RQPat<nxv2i64, v2i64, AArch64duplane128, LD1RQ_D_IMM, PTRUE_D>;
+
// continuous load with reg+reg addressing.
defm LD1B : sve_mem_cld_ss<0b0000, "ld1b", Z_b, ZPR8, GPR64NoXZRshifted8>;
defm LD1B_H : sve_mem_cld_ss<0b0001, "ld1b", Z_h, ZPR16, GPR64NoXZRshifted8>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 3f9795f5198b..47e4c6589c26 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -128,7 +128,7 @@ static cl::opt<bool>
static cl::opt<bool>
EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
cl::desc("Enable optimizations on complex GEPs"),
- cl::init(false));
+ cl::init(true));
static cl::opt<bool>
BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
@@ -563,17 +563,6 @@ void AArch64PassConfig::addIRPasses() {
addPass(createFalkorMarkStridedAccessesPass());
}
- TargetPassConfig::addIRPasses();
-
- addPass(createAArch64StackTaggingPass(
- /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
-
- // Match interleaved memory accesses to ldN/stN intrinsics.
- if (TM->getOptLevel() != CodeGenOpt::None) {
- addPass(createInterleavedLoadCombinePass());
- addPass(createInterleavedAccessPass());
- }
-
if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
// Call SeparateConstOffsetFromGEP pass to extract constants within indices
// and lower a GEP with multiple indices to either arithmetic operations or
@@ -587,6 +576,17 @@ void AArch64PassConfig::addIRPasses() {
addPass(createLICMPass());
}
+ TargetPassConfig::addIRPasses();
+
+ addPass(createAArch64StackTaggingPass(
+ /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
+
+ // Match interleaved memory accesses to ldN/stN intrinsics.
+ if (TM->getOptLevel() != CodeGenOpt::None) {
+ addPass(createInterleavedLoadCombinePass());
+ addPass(createInterleavedAccessPass());
+ }
+
// Add Control Flow Guard checks.
if (TM->getTargetTriple().isOSWindows())
addPass(createCFGuardCheckPass());
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 274a025e82a0..66617393c9ae 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -37,6 +38,74 @@ static cl::opt<unsigned> SVEGatherOverhead("sve-gather-overhead", cl::init(10),
static cl::opt<unsigned> SVEScatterOverhead("sve-scatter-overhead",
cl::init(10), cl::Hidden);
+class TailFoldingKind {
+private:
+ uint8_t Bits = 0; // Currently defaults to disabled.
+
+public:
+ enum TailFoldingOpts {
+ TFDisabled = 0x0,
+ TFReductions = 0x01,
+ TFRecurrences = 0x02,
+ TFSimple = 0x80,
+ TFAll = TFReductions | TFRecurrences | TFSimple
+ };
+
+ void operator=(const std::string &Val) {
+ if (Val.empty())
+ return;
+ SmallVector<StringRef, 6> TailFoldTypes;
+ StringRef(Val).split(TailFoldTypes, '+', -1, false);
+ for (auto TailFoldType : TailFoldTypes) {
+ if (TailFoldType == "disabled")
+ Bits = 0;
+ else if (TailFoldType == "all")
+ Bits = TFAll;
+ else if (TailFoldType == "default")
+ Bits = 0; // Currently defaults to never tail-folding.
+ else if (TailFoldType == "simple")
+ add(TFSimple);
+ else if (TailFoldType == "reductions")
+ add(TFReductions);
+ else if (TailFoldType == "recurrences")
+ add(TFRecurrences);
+ else if (TailFoldType == "noreductions")
+ remove(TFReductions);
+ else if (TailFoldType == "norecurrences")
+ remove(TFRecurrences);
+ else {
+ errs()
+ << "invalid argument " << TailFoldType.str()
+ << " to -sve-tail-folding=; each element must be one of: disabled, "
+ "all, default, simple, reductions, noreductions, recurrences, "
+ "norecurrences\n";
+ }
+ }
+ }
+
+ operator uint8_t() const { return Bits; }
+
+ void add(uint8_t Flag) { Bits |= Flag; }
+ void remove(uint8_t Flag) { Bits &= ~Flag; }
+};
+
+TailFoldingKind TailFoldingKindLoc;
+
+cl::opt<TailFoldingKind, true, cl::parser<std::string>> SVETailFolding(
+ "sve-tail-folding",
+ cl::desc(
+ "Control the use of vectorisation using tail-folding for SVE:"
+ "\ndisabled No loop types will vectorize using tail-folding"
+ "\ndefault Uses the default tail-folding settings for the target "
+ "CPU"
+ "\nall All legal loop types will vectorize using tail-folding"
+ "\nsimple Use tail-folding for simple loops (not reductions or "
+ "recurrences)"
+ "\nreductions Use tail-folding for loops containing reductions"
+ "\nrecurrences Use tail-folding for loops containing first order "
+ "recurrences"),
+ cl::location(TailFoldingKindLoc));
+
bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
@@ -2955,3 +3024,20 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
}
+
+bool AArch64TTIImpl::preferPredicateOverEpilogue(
+ Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
+ TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL) {
+ if (!ST->hasSVE() || TailFoldingKindLoc == TailFoldingKind::TFDisabled)
+ return false;
+
+ TailFoldingKind Required; // Defaults to 0.
+ if (LVL->getReductionVars().size())
+ Required.add(TailFoldingKind::TFReductions);
+ if (LVL->getFirstOrderRecurrences().size())
+ Required.add(TailFoldingKind::TFRecurrences);
+ if (!Required)
+ Required.add(TailFoldingKind::TFSimple);
+
+ return (TailFoldingKindLoc & Required) == Required;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 59ec91843266..2231f8705998 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -340,6 +340,11 @@ public:
return PredicationStyle::None;
}
+ bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
+ AssumptionCache &AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ LoopVectorizationLegality *LVL);
+
bool supportsScalableVectors() const { return ST->hasSVE(); }
bool enableScalableVectorization() const { return ST->hasSVE(); }
@@ -347,6 +352,11 @@ public:
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const;
+ bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const {
+ return ST->hasSVE();
+ }
+
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index f129bfe11e4d..3fe3b2a69855 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -231,7 +231,70 @@ void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
{codeview::RegisterId::ARM64_Q29, AArch64::Q29},
{codeview::RegisterId::ARM64_Q30, AArch64::Q30},
{codeview::RegisterId::ARM64_Q31, AArch64::Q31},
-
+ {codeview::RegisterId::ARM64_B0, AArch64::B0},
+ {codeview::RegisterId::ARM64_B1, AArch64::B1},
+ {codeview::RegisterId::ARM64_B2, AArch64::B2},
+ {codeview::RegisterId::ARM64_B3, AArch64::B3},
+ {codeview::RegisterId::ARM64_B4, AArch64::B4},
+ {codeview::RegisterId::ARM64_B5, AArch64::B5},
+ {codeview::RegisterId::ARM64_B6, AArch64::B6},
+ {codeview::RegisterId::ARM64_B7, AArch64::B7},
+ {codeview::RegisterId::ARM64_B8, AArch64::B8},
+ {codeview::RegisterId::ARM64_B9, AArch64::B9},
+ {codeview::RegisterId::ARM64_B10, AArch64::B10},
+ {codeview::RegisterId::ARM64_B11, AArch64::B11},
+ {codeview::RegisterId::ARM64_B12, AArch64::B12},
+ {codeview::RegisterId::ARM64_B13, AArch64::B13},
+ {codeview::RegisterId::ARM64_B14, AArch64::B14},
+ {codeview::RegisterId::ARM64_B15, AArch64::B15},
+ {codeview::RegisterId::ARM64_B16, AArch64::B16},
+ {codeview::RegisterId::ARM64_B17, AArch64::B17},
+ {codeview::RegisterId::ARM64_B18, AArch64::B18},
+ {codeview::RegisterId::ARM64_B19, AArch64::B19},
+ {codeview::RegisterId::ARM64_B20, AArch64::B20},
+ {codeview::RegisterId::ARM64_B21, AArch64::B21},
+ {codeview::RegisterId::ARM64_B22, AArch64::B22},
+ {codeview::RegisterId::ARM64_B23, AArch64::B23},
+ {codeview::RegisterId::ARM64_B24, AArch64::B24},
+ {codeview::RegisterId::ARM64_B25, AArch64::B25},
+ {codeview::RegisterId::ARM64_B26, AArch64::B26},
+ {codeview::RegisterId::ARM64_B27, AArch64::B27},
+ {codeview::RegisterId::ARM64_B28, AArch64::B28},
+ {codeview::RegisterId::ARM64_B29, AArch64::B29},
+ {codeview::RegisterId::ARM64_B30, AArch64::B30},
+ {codeview::RegisterId::ARM64_B31, AArch64::B31},
+ {codeview::RegisterId::ARM64_H0, AArch64::H0},
+ {codeview::RegisterId::ARM64_H1, AArch64::H1},
+ {codeview::RegisterId::ARM64_H2, AArch64::H2},
+ {codeview::RegisterId::ARM64_H3, AArch64::H3},
+ {codeview::RegisterId::ARM64_H4, AArch64::H4},
+ {codeview::RegisterId::ARM64_H5, AArch64::H5},
+ {codeview::RegisterId::ARM64_H6, AArch64::H6},
+ {codeview::RegisterId::ARM64_H7, AArch64::H7},
+ {codeview::RegisterId::ARM64_H8, AArch64::H8},
+ {codeview::RegisterId::ARM64_H9, AArch64::H9},
+ {codeview::RegisterId::ARM64_H10, AArch64::H10},
+ {codeview::RegisterId::ARM64_H11, AArch64::H11},
+ {codeview::RegisterId::ARM64_H12, AArch64::H12},
+ {codeview::RegisterId::ARM64_H13, AArch64::H13},
+ {codeview::RegisterId::ARM64_H14, AArch64::H14},
+ {codeview::RegisterId::ARM64_H15, AArch64::H15},
+ {codeview::RegisterId::ARM64_H16, AArch64::H16},
+ {codeview::RegisterId::ARM64_H17, AArch64::H17},
+ {codeview::RegisterId::ARM64_H18, AArch64::H18},
+ {codeview::RegisterId::ARM64_H19, AArch64::H19},
+ {codeview::RegisterId::ARM64_H20, AArch64::H20},
+ {codeview::RegisterId::ARM64_H21, AArch64::H21},
+ {codeview::RegisterId::ARM64_H22, AArch64::H22},
+ {codeview::RegisterId::ARM64_H23, AArch64::H23},
+ {codeview::RegisterId::ARM64_H24, AArch64::H24},
+ {codeview::RegisterId::ARM64_H25, AArch64::H25},
+ {codeview::RegisterId::ARM64_H26, AArch64::H26},
+ {codeview::RegisterId::ARM64_H27, AArch64::H27},
+ {codeview::RegisterId::ARM64_H28, AArch64::H28},
+ {codeview::RegisterId::ARM64_H29, AArch64::H29},
+ {codeview::RegisterId::ARM64_H30, AArch64::H30},
+ {codeview::RegisterId::ARM64_H31, AArch64::H31},
};
for (const auto &I : RegMap)
MRI->mapLLVMRegToCVReg(I.Reg, static_cast<int>(I.CVReg));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 48b5814cd482..2d6f1438e315 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -585,6 +585,12 @@ def FeatureMAIInsts : SubtargetFeature<"mai-insts",
"Has mAI instructions"
>;
+def FeatureFP8Insts : SubtargetFeature<"fp8-insts",
+ "HasFP8Insts",
+ "true",
+ "Has fp8 and bf8 instructions"
+>;
+
def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"HasPkFmacF16Inst",
"true",
@@ -1124,6 +1130,7 @@ def FeatureISAVersion9_4_0 : FeatureSet<
Feature64BitDPP,
FeaturePackedFP32Ops,
FeatureMAIInsts,
+ FeatureFP8Insts,
FeaturePkFmacF16Inst,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
@@ -1265,11 +1272,14 @@ def FeatureISAVersion11_Common : FeatureSet<
FeaturePackedTID,
FeatureVcmpxPermlaneHazard]>;
-// Features for GFX 11.0.0 and 11.0.1
-def FeatureISAVersion11_0 : FeatureSet<
+def FeatureISAVersion11_0_0 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureUserSGPRInit16Bug])>;
+def FeatureISAVersion11_0_1 : FeatureSet<
+ !listconcat(FeatureISAVersion11_Common.Features,
+ [])>;
+
def FeatureISAVersion11_0_2 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureUserSGPRInit16Bug])>;
@@ -1704,6 +1714,9 @@ def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">,
def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">,
AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>;
+def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
+ AssemblerPredicate<(all_of FeatureFP8Insts)>;
+
def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index d28f38e42430..d361e33995cf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -74,6 +74,7 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
+ << " LDSKernelId: " << FI.second.LDSKernelId
<< " PrivateSegmentWaveByteOffset: "
<< FI.second.PrivateSegmentWaveByteOffset
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
@@ -107,6 +108,9 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
+ case AMDGPUFunctionArgInfo::LDS_KERNEL_ID:
+ return std::make_tuple(LDSKernelId ? &LDSKernelId : nullptr,
+ &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
return std::make_tuple(
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
@@ -162,6 +166,7 @@ constexpr AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12);
AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13);
AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14);
+ AI.LDSKernelId = ArgDescriptor::createRegister(AMDGPU::SGPR15);
const unsigned Mask = 0x3ff;
AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index e9ed45d8cd14..f595e469f998 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -103,6 +103,7 @@ struct AMDGPUFunctionArgInfo {
KERNARG_SEGMENT_PTR = 3,
DISPATCH_ID = 4,
FLAT_SCRATCH_INIT = 5,
+ LDS_KERNEL_ID = 6, // LLVM internal, not part of the ABI
WORKGROUP_ID_X = 10,
WORKGROUP_ID_Y = 11,
WORKGROUP_ID_Z = 12,
@@ -128,6 +129,7 @@ struct AMDGPUFunctionArgInfo {
ArgDescriptor DispatchID;
ArgDescriptor FlatScratchInit;
ArgDescriptor PrivateSegmentSize;
+ ArgDescriptor LDSKernelId;
// System SGPRs in kernels.
ArgDescriptor WorkGroupIDX;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 57a4660bc1eb..13a65f1ad601 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -27,8 +27,10 @@
#include "SIMachineFunctionInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -415,6 +417,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
}
+ if (CurrentProgramInfo.DynamicCallStack) {
+ KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK;
+ }
+
return KernelCodeProperties;
}
@@ -506,6 +512,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
emitFunctionBody();
+ emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),
+ STM.hasMAIInsts());
+
if (isVerbose()) {
MCSectionELF *CommentSection =
Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
@@ -875,6 +884,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
LDSAlignShift = 9;
}
+ ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();
+ ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();
+
ProgInfo.LDSSize = MFI->getLDSSize();
ProgInfo.LDSBlocks =
alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
@@ -1180,3 +1192,58 @@ void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AMDGPUResourceUsageAnalysis>();
AsmPrinter::getAnalysisUsage(AU);
}
+
+void AMDGPUAsmPrinter::emitResourceUsageRemarks(
+ const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,
+ bool isModuleEntryFunction, bool hasMAIInsts) {
+ if (!ORE)
+ return;
+
+ const char *Name = "kernel-resource-usage";
+ const char *Indent = " ";
+
+ // If the remark is not specifically enabled, do not output to yaml
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))
+ return;
+
+ auto EmitResourceUsageRemark = [&](StringRef RemarkName,
+ StringRef RemarkLabel, auto Argument) {
+ // Add an indent for every line besides the line with the kernel name. This
+ // makes it easier to tell which resource usage go with which kernel since
+ // the kernel name will always be displayed first.
+ std::string LabelStr = RemarkLabel.str() + ": ";
+ if (!RemarkName.equals("FunctionName"))
+ LabelStr = Indent + LabelStr;
+
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(Name, RemarkName,
+ MF.getFunction().getSubprogram(),
+ &MF.front())
+ << LabelStr << ore::NV(RemarkName, Argument);
+ });
+ };
+
+ // FIXME: Formatting here is pretty nasty because clang does not accept
+ // newlines from diagnostics. This forces us to emit multiple diagnostic
+ // remarks to simulate newlines. If and when clang does accept newlines, this
+ // formatting should be aggregated into one remark with newlines to avoid
+ // printing multiple diagnostic location and diag opts.
+ EmitResourceUsageRemark("FunctionName", "Function Name",
+ MF.getFunction().getName());
+ EmitResourceUsageRemark("NumSGPR", "SGPRs", CurrentProgramInfo.NumSGPR);
+ EmitResourceUsageRemark("NumVGPR", "VGPRs", CurrentProgramInfo.NumArchVGPR);
+ if (hasMAIInsts)
+ EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
+ EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
+ CurrentProgramInfo.ScratchSize);
+ EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
+ CurrentProgramInfo.Occupancy);
+ EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",
+ CurrentProgramInfo.SGPRSpill);
+ EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",
+ CurrentProgramInfo.VGPRSpill);
+ if (isModuleEntryFunction)
+ EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",
+ CurrentProgramInfo.LDSSize);
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index ddda2cf107b1..2881b8d7bcca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -69,6 +69,9 @@ private:
uint64_t ScratchSize,
uint64_t CodeSize,
const AMDGPUMachineFunction* MFI);
+ void emitResourceUsageRemarks(const MachineFunction &MF,
+ const SIProgramInfo &CurrentProgramInfo,
+ bool isModuleEntryFunction, bool hasMAIInsts);
uint16_t getAmdhsaKernelCodeProperties(
const MachineFunction &MF) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
index 0a2cf3874245..c7a060c5db5b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
@@ -27,5 +27,6 @@ AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z")
AMDGPU_ATTRIBUTE(WORKITEM_ID_X, "amdgpu-no-workitem-id-x")
AMDGPU_ATTRIBUTE(WORKITEM_ID_Y, "amdgpu-no-workitem-id-y")
AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z")
+AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id")
#undef AMDGPU_ATTRIBUTE
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 8de0d7e6bff1..a3634d2440c3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -72,6 +72,8 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
case Intrinsic::amdgcn_workgroup_id_z:
case Intrinsic::r600_read_tgid_z:
return WORKGROUP_ID_Z;
+ case Intrinsic::amdgcn_lds_kernel_id:
+ return LDS_KERNEL_ID;
case Intrinsic::amdgcn_dispatch_ptr:
return DISPATCH_PTR;
case Intrinsic::amdgcn_dispatch_id:
@@ -457,6 +459,10 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
removeAssumedBits(QUEUE_PTR);
}
+ if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
+ removeAssumedBits(LDS_KERNEL_ID);
+ }
+
return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
: ChangeStatus::UNCHANGED;
}
@@ -591,6 +597,16 @@ private:
return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
UsedAssumedInformation);
}
+
+ bool funcRetrievesLDSKernelId(Attributor &A) {
+ auto DoesNotRetrieve = [&](Instruction &I) {
+ auto &Call = cast<CallBase>(I);
+ return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
+ };
+ bool UsedAssumedInformation = false;
+ return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
+ UsedAssumedInformation);
+ }
};
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
@@ -743,7 +759,8 @@ public:
AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
DenseSet<const char *> Allowed(
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
- &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID});
+ &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID,
+ &AAPointerInfo::ID});
AttributorConfig AC(CGUpdater);
AC.Allowed = &Allowed;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index fd812eb676ef..4550cfdcf883 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -764,7 +764,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
AMDGPUFunctionArgInfo::DISPATCH_ID,
AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
+ AMDGPUFunctionArgInfo::LDS_KERNEL_ID,
};
static constexpr StringLiteral ImplicitAttrNames[] = {
@@ -774,7 +775,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
"amdgpu-no-dispatch-id",
"amdgpu-no-workgroup-id-x",
"amdgpu-no-workgroup-id-y",
- "amdgpu-no-workgroup-id-z"
+ "amdgpu-no-workgroup-id-z",
+ "amdgpu-no-lds-kernel-id",
};
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -810,6 +812,14 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
} else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
+ } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) {
+ Optional<uint32_t> Id =
+ AMDGPUMachineFunction::getLDSKernelIdMetadata(MF.getFunction());
+ if (Id.has_value()) {
+ MIRBuilder.buildConstant(InputReg, Id.value());
+ } else {
+ MIRBuilder.buildUndef(InputReg);
+ }
} else {
// We may have proven the input wasn't needed, although the ABI is
// requiring it. We just need to allocate the register appropriately.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 5747fc0ca8e6..229dfb62ef6e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -88,6 +88,10 @@ def gi_smrd_sgpr :
GIComplexOperandMatcher<s64, "selectSmrdSgpr">,
GIComplexPatternEquiv<SMRDSgpr>;
+def gi_smrd_sgpr_imm :
+ GIComplexOperandMatcher<s64, "selectSmrdSgprImm">,
+ GIComplexPatternEquiv<SMRDSgprImm>;
+
def gi_flat_offset :
GIComplexOperandMatcher<s64, "selectFlatOffset">,
GIComplexPatternEquiv<FlatOffset>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 6fa44ffcbfaa..632a76b32009 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -875,6 +875,8 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
Kern.getDocument()->getNode(ProgramInfo.LDSSize);
Kern[".private_segment_fixed_size"] =
Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
+ Kern[".uses_dynamic_stack"] =
+ Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
// FIXME: The metadata treats the minimum as 16?
Kern[".kernarg_segment_align"] =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 589992c7a7ec..147c8850587e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -33,7 +33,7 @@
#include "llvm/IR/Dominators.h"
#endif
-#define DEBUG_TYPE "isel"
+#define DEBUG_TYPE "amdgpu-isel"
using namespace llvm;
@@ -1886,21 +1886,21 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
// Match an immediate (if Imm is true) or an SGPR (if Imm is false)
// offset. If Imm32Only is true, match only 32-bit immediate offsets
// available on CI.
-bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
- SDValue &Offset, bool Imm,
+bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue Addr, SDValue ByteOffsetNode,
+ SDValue *SOffset, SDValue *Offset,
bool Imm32Only) const {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
if (!C) {
- if (Imm)
+ if (!SOffset)
return false;
if (ByteOffsetNode.getValueType().isScalarInteger() &&
ByteOffsetNode.getValueType().getSizeInBits() == 32) {
- Offset = ByteOffsetNode;
+ *SOffset = ByteOffsetNode;
return true;
}
if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
- Offset = ByteOffsetNode.getOperand(0);
+ *SOffset = ByteOffsetNode.getOperand(0);
return true;
}
}
@@ -1912,8 +1912,8 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
int64_t ByteOffset = C->getSExtValue();
Optional<int64_t> EncodedOffset =
AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
- if (EncodedOffset && Imm && !Imm32Only) {
- Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
+ if (EncodedOffset && Offset && !Imm32Only) {
+ *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
return true;
}
@@ -1922,17 +1922,17 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
return false;
EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
- if (EncodedOffset && Imm32Only) {
- Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
+ if (EncodedOffset && Offset && Imm32Only) {
+ *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
return true;
}
if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
return false;
- if (!Imm) {
+ if (SOffset) {
SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
- Offset = SDValue(
+ *SOffset = SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
return true;
}
@@ -1968,11 +1968,18 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
// Match a base and an immediate (if Imm is true) or an SGPR
// (if Imm is false) offset. If Imm32Only is true, match only 32-bit
// immediate offsets available on CI.
-bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
- SDValue &Offset, bool Imm,
- bool Imm32Only) const {
+bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
+ SDValue *SOffset, SDValue *Offset,
+ bool Imm32Only) const {
SDLoc SL(Addr);
+ if (SOffset && Offset) {
+ assert(!Imm32Only);
+ SDValue B;
+ return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
+ SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
+ }
+
// A 32-bit (address + offset) should not cause unsigned 32-bit integer
// wraparound, because s_load instructions perform the addition in 64 bits.
if ((Addr.getValueType() != MVT::i32 ||
@@ -1987,34 +1994,55 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
assert(N0 && N1 && isa<ConstantSDNode>(N1));
}
if (N0 && N1) {
- if (SelectSMRDOffset(N1, Offset, Imm, Imm32Only)) {
- SBase = Expand32BitAddress(N0);
+ if (SelectSMRDOffset(N0, N1, SOffset, Offset, Imm32Only)) {
+ SBase = N0;
+ return true;
+ }
+ if (SelectSMRDOffset(N1, N0, SOffset, Offset, Imm32Only)) {
+ SBase = N1;
return true;
}
}
return false;
}
- if (!Imm)
+ if (Offset && !SOffset) {
+ SBase = Addr;
+ *Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
+ SDValue *SOffset, SDValue *Offset,
+ bool Imm32Only) const {
+ if (!SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only))
return false;
- SBase = Expand32BitAddress(Addr);
- Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
+ SBase = Expand32BitAddress(SBase);
return true;
}
bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
SDValue &Offset) const {
- return SelectSMRD(Addr, SBase, Offset, /* Imm */ true);
+ return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);
}
bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
SDValue &Offset) const {
assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
- return SelectSMRD(Addr, SBase, Offset, /* Imm */ true, /* Imm32Only */ true);
+ return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,
+ /* Imm32Only */ true);
}
bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
- SDValue &Offset) const {
- return SelectSMRD(Addr, SBase, Offset, /* Imm */ false);
+ SDValue &SOffset) const {
+ return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
+ SDValue &SOffset,
+ SDValue &Offset) const {
+ return SelectSMRD(Addr, SBase, &SOffset, &Offset);
}
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 7894b8eb5b67..fda2bfac71fc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -193,14 +193,18 @@ private:
bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &SAddr, SDValue &Offset) const;
- bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool Imm,
- bool Imm32Only) const;
+ bool SelectSMRDOffset(SDValue Base, SDValue ByteOffsetNode, SDValue *SOffset,
+ SDValue *Offset, bool Imm32Only = false) const;
SDValue Expand32BitAddress(SDValue Addr) const;
- bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, bool Imm,
- bool Imm32Only = false) const;
+ bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
+ SDValue *Offset, bool Imm32Only = false) const;
+ bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
+ SDValue *Offset, bool Imm32Only = false) const;
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
+ bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset,
+ SDValue &Offset) const;
bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 70fae9d784a2..f2e5c2fe00e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1006,6 +1006,14 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
return selectSMFMACIntrin(I);
default:
return selectImpl(I, *CoverageInfo);
@@ -2361,7 +2369,7 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
if (PtrMI->getOpcode() != TargetOpcode::G_PTR_ADD)
return;
- GEPInfo GEPInfo(*PtrMI);
+ GEPInfo GEPInfo;
for (unsigned i = 1; i != 3; ++i) {
const MachineOperand &GEPOp = PtrMI->getOperand(i);
@@ -3237,6 +3245,8 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
return Register();
+ assert(Def->getNumOperands() == 3 &&
+ MRI.getType(Def->getOperand(0).getReg()) == LLT::scalar(64));
if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) {
return Def->getOperand(1).getReg();
}
@@ -3354,6 +3364,30 @@ bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const {
case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
break;
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
+ Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
+ Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
+ Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
+ Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
+ Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
+ Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
+ Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
+ break;
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
+ Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
+ break;
default:
llvm_unreachable("unhandled smfmac intrinsic");
}
@@ -3800,25 +3834,82 @@ AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {
}};
}
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
+bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
+ Register &Base,
+ Register *SOffset,
+ int64_t *Offset) const {
+ MachineInstr *MI = Root.getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
+ // then we can select all ptr + 32-bit offsets.
SmallVector<GEPInfo, 4> AddrInfo;
- getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo);
+ getAddrModeInfo(*MI, *MRI, AddrInfo);
- if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
- return None;
+ if (AddrInfo.empty())
+ return false;
- const GEPInfo &GEPInfo = AddrInfo[0];
+ const GEPInfo &GEPI = AddrInfo[0];
Optional<int64_t> EncodedImm =
- AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm, false);
- if (!EncodedImm)
+ AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, false);
+
+ if (SOffset && Offset) {
+ if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
+ AddrInfo.size() > 1) {
+ const GEPInfo &GEPI2 = AddrInfo[1];
+ if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
+ if (Register OffsetReg =
+ matchZeroExtendFromS32(*MRI, GEPI2.SgprParts[1])) {
+ Base = GEPI2.SgprParts[0];
+ *SOffset = OffsetReg;
+ *Offset = *EncodedImm;
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
+ Base = GEPI.SgprParts[0];
+ *Offset = *EncodedImm;
+ return true;
+ }
+
+ // SGPR offset is unsigned.
+ if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
+ GEPI.Imm != 0) {
+ // If we make it this far we have a load with an 32-bit immediate offset.
+ // It is OK to select this using a sgpr offset, because we have already
+ // failed trying to select this load into one of the _IMM variants since
+ // the _IMM Patterns are considered before the _SGPR patterns.
+ Base = GEPI.SgprParts[0];
+ *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
+ .addImm(GEPI.Imm);
+ return true;
+ }
+
+ if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
+ if (Register OffsetReg = matchZeroExtendFromS32(*MRI, GEPI.SgprParts[1])) {
+ Base = GEPI.SgprParts[0];
+ *SOffset = OffsetReg;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
+ Register Base;
+ int64_t Offset;
+ if (!selectSmrdOffset(Root, Base, /* SOffset= */ nullptr, &Offset))
return None;
- unsigned PtrReg = GEPInfo.SgprParts[0];
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }
- }};
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}};
}
InstructionSelector::ComplexRendererFns
@@ -3844,43 +3935,24 @@ AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
- MachineInstr *MI = Root.getParent();
- MachineBasicBlock *MBB = MI->getParent();
-
- SmallVector<GEPInfo, 4> AddrInfo;
- getAddrModeInfo(*MI, *MRI, AddrInfo);
-
- // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
- // then we can select all ptr + 32-bit offsets.
- if (AddrInfo.empty())
+ Register Base, SOffset;
+ if (!selectSmrdOffset(Root, Base, &SOffset, /* Offset= */ nullptr))
return None;
- const GEPInfo &GEPInfo = AddrInfo[0];
- Register PtrReg = GEPInfo.SgprParts[0];
-
- // SGPR offset is unsigned.
- if (AddrInfo[0].SgprParts.size() == 1 && isUInt<32>(GEPInfo.Imm) &&
- GEPInfo.Imm != 0) {
- // If we make it this far we have a load with an 32-bit immediate offset.
- // It is OK to select this using a sgpr offset, because we have already
- // failed trying to select this load into one of the _IMM variants since
- // the _IMM Patterns are considered before the _SGPR patterns.
- Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
- .addImm(GEPInfo.Imm);
- return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
- }
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}};
+}
- if (AddrInfo[0].SgprParts.size() == 2 && GEPInfo.Imm == 0) {
- if (Register OffsetReg =
- matchZeroExtendFromS32(*MRI, GEPInfo.SgprParts[1])) {
- return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
- [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}};
- }
- }
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const {
+ Register Base, SOffset;
+ int64_t Offset;
+ if (!selectSmrdOffset(Root, Base, &SOffset, &Offset))
+ return None;
- return None;
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}};
}
std::pair<Register, int>
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 22672ba59e76..5baf55d23480 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -63,11 +63,9 @@ public:
private:
struct GEPInfo {
- const MachineInstr &GEP;
SmallVector<unsigned, 2> SgprParts;
SmallVector<unsigned, 2> VgprParts;
- int64_t Imm;
- GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
+ int64_t Imm = 0;
};
bool isSGPR(Register Reg) const;
@@ -200,12 +198,16 @@ private:
InstructionSelector::ComplexRendererFns
selectVINTERPModsHi(MachineOperand &Root) const;
+ bool selectSmrdOffset(MachineOperand &Root, Register &Base, Register *SOffset,
+ int64_t *Offset) const;
InstructionSelector::ComplexRendererFns
selectSmrdImm(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectSmrdImm32(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectSmrdSgpr(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectSmrdSgprImm(MachineOperand &Root) const;
std::pair<Register, int> selectFlatOffsetImpl(MachineOperand &Root,
uint64_t FlatVariant) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 01a3e78ea48c..0979debe9777 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4197,6 +4197,35 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
return true;
}
+bool AMDGPULegalizerInfo::getLDSKernelId(Register DstReg,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ Function &F = B.getMF().getFunction();
+ Optional<uint32_t> KnownSize =
+ AMDGPUMachineFunction::getLDSKernelIdMetadata(F);
+ if (KnownSize.has_value())
+ B.buildConstant(DstReg, KnownSize.value());
+ return false;
+}
+
+bool AMDGPULegalizerInfo::legalizeLDSKernelId(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ if (!MFI->isEntryFunction()) {
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!getLDSKernelId(DstReg, MRI, B))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
@@ -5636,6 +5665,9 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_workgroup_id_z:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_lds_kernel_id:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
case Intrinsic::amdgcn_dispatch_ptr:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::DISPATCH_PTR);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index cee533aa34ec..5e8111e22aad 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -155,6 +155,13 @@ public:
bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+
+ bool getLDSKernelId(Register DstReg, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
+ bool legalizeLDSKernelId(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, unsigned AddrSpace) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 78e092b2e872..7e49a6117ebd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -376,15 +376,7 @@ static bool HasNative(AMDGPULibFunc::EFuncId id) {
return false;
}
-struct TableRef {
- size_t size;
- const TableEntry *table; // variable size: from 0 to (size - 1)
-
- TableRef() : size(0), table(nullptr) {}
-
- template <size_t N>
- TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
-};
+using TableRef = ArrayRef<TableEntry>;
static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
switch(id) {
@@ -698,11 +690,10 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
// Table-Driven optimization
const TableRef tr = getOptTable(FInfo.getId());
- if (tr.size==0)
+ if (tr.empty())
return false;
- int const sz = (int)tr.size;
- const TableEntry * const ftbl = tr.table;
+ int const sz = (int)tr.size();
Value *opr0 = CI->getArgOperand(0);
if (getVecSize(FInfo) > 1) {
@@ -714,8 +705,8 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
assert(eltval && "Non-FP arguments in math function!");
bool found = false;
for (int i=0; i < sz; ++i) {
- if (eltval->isExactlyValue(ftbl[i].input)) {
- DVal.push_back(ftbl[i].result);
+ if (eltval->isExactlyValue(tr[i].input)) {
+ DVal.push_back(tr[i].result);
found = true;
break;
}
@@ -746,8 +737,8 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
// Scalar version
if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
for (int i = 0; i < sz; ++i) {
- if (CF->isExactlyValue(ftbl[i].input)) {
- Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
+ if (CF->isExactlyValue(tr[i].input)) {
+ Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
replaceCall(nval);
return true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 35922341de26..b4a8766d682e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -55,21 +55,6 @@ static cl::opt<bool> SuperAlignLDSGlobals(
cl::init(true), cl::Hidden);
namespace {
-
-SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) {
- SmallPtrSet<GlobalValue *, 32> UsedList;
-
- SmallVector<GlobalValue *, 32> TmpVec;
- collectUsedGlobalVariables(M, TmpVec, true);
- UsedList.insert(TmpVec.begin(), TmpVec.end());
-
- TmpVec.clear();
- collectUsedGlobalVariables(M, TmpVec, false);
- UsedList.insert(TmpVec.begin(), TmpVec.end());
-
- return UsedList;
-}
-
class AMDGPULowerModuleLDS : public ModulePass {
static void removeFromUsedList(Module &M, StringRef Name,
@@ -153,9 +138,6 @@ class AMDGPULowerModuleLDS : public ModulePass {
"");
}
-private:
- SmallPtrSet<GlobalValue *, 32> UsedList;
-
public:
static char ID;
@@ -165,9 +147,10 @@ public:
bool runOnModule(Module &M) override {
CallGraph CG = CallGraph(M);
- UsedList = getUsedList(M);
bool Changed = superAlignLDSGlobals(M);
- Changed |= processUsedLDS(CG, M);
+ std::vector<GlobalVariable *> ModuleScopeVariables =
+ AMDGPU::findVariablesToLower(M, nullptr);
+ Changed |= processUsedLDS(CG, M, ModuleScopeVariables);
for (Function &F : M.functions()) {
if (F.isDeclaration())
@@ -176,10 +159,11 @@ public:
// Only lower compute kernels' LDS.
if (!AMDGPU::isKernel(F.getCallingConv()))
continue;
- Changed |= processUsedLDS(CG, M, &F);
+ std::vector<GlobalVariable *> KernelUsedVariables =
+ AMDGPU::findVariablesToLower(M, &F);
+ Changed |= processUsedLDS(CG, M, KernelUsedVariables, &F);
}
- UsedList.clear();
return Changed;
}
@@ -228,22 +212,20 @@ private:
return Changed;
}
- bool processUsedLDS(CallGraph const &CG, Module &M, Function *F = nullptr) {
+ bool processUsedLDS(CallGraph const &CG, Module &M,
+ std::vector<GlobalVariable *> const &LDSVarsToTransform,
+ Function *F = nullptr) {
LLVMContext &Ctx = M.getContext();
const DataLayout &DL = M.getDataLayout();
- // Find variables to move into new struct instance
- std::vector<GlobalVariable *> FoundLocalVars =
- AMDGPU::findVariablesToLower(M, F);
-
- if (FoundLocalVars.empty()) {
+ if (LDSVarsToTransform.empty()) {
// No variables to rewrite, no changes made.
return false;
}
SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
- LayoutFields.reserve(FoundLocalVars.size());
- for (GlobalVariable *GV : FoundLocalVars) {
+ LayoutFields.reserve(LDSVarsToTransform.size());
+ for (GlobalVariable *GV : LDSVarsToTransform) {
OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()),
AMDGPU::getAlign(DL, GV));
LayoutFields.emplace_back(F);
@@ -252,7 +234,7 @@ private:
performOptimizedStructLayout(LayoutFields);
std::vector<GlobalVariable *> LocalVars;
- LocalVars.reserve(FoundLocalVars.size()); // will be at least this large
+ LocalVars.reserve(LDSVarsToTransform.size()); // will be at least this large
{
// This usually won't need to insert any padding, perhaps avoid the alloc
uint64_t CurrentOffset = 0;
@@ -352,7 +334,6 @@ private:
GV->replaceAllUsesWith(GEP);
}
if (GV->use_empty()) {
- UsedList.erase(GV);
GV->eraseFromParent();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index b461c3c4bfdc..f5e12fd960d0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -11,6 +11,7 @@
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -101,6 +102,21 @@ void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) {
}
}
+Optional<uint32_t>
+AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
+ auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
+ if (MD && MD->getNumOperands() == 1) {
+ ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
+ if (KnownSize) {
+ uint64_t V = KnownSize->getZExtValue();
+ if (V <= UINT32_MAX) {
+ return V;
+ }
+ }
+ }
+ return {};
+}
+
void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
const GlobalVariable &GV) {
assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index df62c2314617..97db8b7eb8d6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -11,11 +11,12 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Function.h"
namespace llvm {
@@ -104,6 +105,8 @@ public:
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV);
void allocateModuleLDSGlobal(const Function &F);
+ static Optional<uint32_t> getLDSKernelIdMetadata(const Function &F);
+
Align getDynLDSAlign() const { return DynLDSAlign; }
void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 09dbd2150db6..a9f1e9bd0996 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -74,10 +74,10 @@ public:
private:
struct MemAccessInfo {
- const Value *V;
- const Value *Base;
- int64_t Offset;
- MemAccessInfo() : V(nullptr), Base(nullptr), Offset(0) {}
+ const Value *V = nullptr;
+ const Value *Base = nullptr;
+ int64_t Offset = 0;
+ MemAccessInfo() = default;
bool isLargeStride(MemAccessInfo &Reference) const;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Printable print() const {
@@ -116,6 +116,7 @@ private:
bool isGlobalAddr(const Value *V) const;
bool isLocalAddr(const Value *V) const;
+ bool isGlobalLoadUsedInBB(const Instruction &) const;
};
static std::pair<const Value *, const Type *> getMemoryInstrPtrAndType(
@@ -196,6 +197,24 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
return false;
}
+// Returns true if the global load `I` is used in its own basic block.
+bool AMDGPUPerfHint::isGlobalLoadUsedInBB(const Instruction &I) const {
+ const auto *Ld = dyn_cast<LoadInst>(&I);
+ if (!Ld)
+ return false;
+ if (!isGlobalAddr(Ld->getPointerOperand()))
+ return false;
+
+ for (const User *Usr : Ld->users()) {
+ if (const Instruction *UsrInst = dyn_cast<Instruction>(Usr)) {
+ if (UsrInst->getParent() == I.getParent())
+ return true;
+ }
+ }
+
+ return false;
+}
+
AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F];
@@ -203,9 +222,14 @@ AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
for (auto &B : F) {
LastAccess = MemAccessInfo();
+ unsigned UsedGlobalLoadsInBB = 0;
for (auto &I : B) {
if (const Type *Ty = getMemoryInstrPtrAndType(&I).second) {
unsigned Size = divideCeil(Ty->getPrimitiveSizeInBits(), 32);
+ // TODO: Check if the global load and its user are close to each other
+ // instead (Or do this analysis in GCNSchedStrategy?).
+ if (isGlobalLoadUsedInBB(I))
+ UsedGlobalLoadsInBB += Size;
if (isIndirectAccess(&I))
FI.IAMInstCost += Size;
if (isLargeStride(&I))
@@ -245,6 +269,16 @@ AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
++FI.InstCost;
}
}
+
+ if (!FI.HasDenseGlobalMemAcc) {
+ unsigned GlobalMemAccPercentage = UsedGlobalLoadsInBB * 100 / B.size();
+ if (GlobalMemAccPercentage > 50) {
+ LLVM_DEBUG(dbgs() << "[HasDenseGlobalMemAcc] Set to true since "
+ << B.getName() << " has " << GlobalMemAccPercentage
+ << "% global memory access\n");
+ FI.HasDenseGlobalMemAcc = true;
+ }
+ }
}
return &FI;
@@ -286,6 +320,11 @@ bool AMDGPUPerfHint::runOnFunction(Function &F) {
}
bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
+ // Reverting optimal scheduling in favour of occupancy with basic block(s)
+ // having dense global memory access can potentially hurt performance.
+ if (FI.HasDenseGlobalMemAcc)
+ return true;
+
return FI.MemInstCost * 100 / FI.InstCost > MemBoundThresh;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
index 31ff80f5f431..2db8db6957ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
@@ -41,7 +41,11 @@ public:
unsigned InstCost;
unsigned IAMInstCost; // Indirect access memory instruction count
unsigned LSMInstCost; // Large stride memory instruction count
- FuncInfo() : MemInstCost(0), InstCost(0), IAMInstCost(0), LSMInstCost(0) {}
+ bool HasDenseGlobalMemAcc; // Set if at least 1 basic block has relatively
+ // high global memory access
+ FuncInfo()
+ : MemInstCost(0), InstCost(0), IAMInstCost(0), LSMInstCost(0),
+ HasDenseGlobalMemAcc(false) {}
};
typedef ValueMap<const Function*, FuncInfo> FuncInfoMap;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 0df6f4d45b06..bd8e568213b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -153,7 +153,10 @@ bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
if (!isVgprRegBank(Dst))
return false;
- if (MRI.getType(Dst).isVector())
+ // med3 for i16 is only available on gfx9+, and not available for v2i16.
+ LLT Ty = MRI.getType(Dst);
+ if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
+ Ty != LLT::scalar(32))
return false;
MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 0830cbd919a0..887341e67454 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4426,7 +4426,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
- case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32: {
+ case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
+ case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
+ case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
+ case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
+ case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
+ case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
+ case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
+ case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
+ case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8: {
// Default for MAI intrinsics.
// srcC can also be an immediate which can be folded later.
// FIXME: Should we eventually add an alternative mapping with AGPR src
@@ -4451,7 +4459,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
- case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: {
+ case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
+ case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
+ case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: {
// vdst, srcA, srcB, srcC, idx
OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
index 4d7a3f4028e8..aa51c5d20bdc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
@@ -141,7 +141,7 @@ class ReplaceLDSUseImpl {
std::vector<GlobalVariable *> collectLDSRequiringPointerReplace() {
// Collect LDS which requires module lowering.
std::vector<GlobalVariable *> LDSGlobals =
- llvm::AMDGPU::findVariablesToLower(M);
+ llvm::AMDGPU::findVariablesToLower(M, nullptr);
// Remove LDS which don't qualify for replacement.
llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 8297635d7bb2..5d7bade00a3e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -340,12 +340,28 @@ def : SourceOfDivergence<int_amdgcn_mfma_i32_16x16x32_i8>;
def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x16_i8>;
def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x8_xf32>;
def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4_xf32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_bf8_bf8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_bf8_fp8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_fp8_bf8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_fp8_fp8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_bf8_bf8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_bf8_fp8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_fp8_bf8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_fp8_fp8>;
def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x32_f16>;
def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x16_f16>;
def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x32_bf16>;
def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x16_bf16>;
def : SourceOfDivergence<int_amdgcn_smfmac_i32_16x16x64_i8>;
def : SourceOfDivergence<int_amdgcn_smfmac_i32_32x32x32_i8>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_fp8_fp8>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_bf8_bf8>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>;
+def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>;
// The dummy boolean output is divergent from the IR's perspective,
// but the mask results are uniform. These produce a divergent and
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 6bd906439ee8..cf4826d81b4b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -736,13 +736,18 @@ static unsigned getMaxNumPreloadedSGPRs() {
2 + // dispatch ID
2 + // flat scratch init
2; // Implicit buffer ptr
+
// Max number of system SGPRs
unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
1 + // WorkGroupIDY
1 + // WorkGroupIDZ
1 + // WorkGroupInfo
1; // private segment wave byte offset
- return MaxUserSGPRs + MaxSystemSGPRs;
+
+ // Max number of synthetic SGPRs
+ unsigned SyntheticSGPRs = 1; // LDSKernelId
+
+ return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs;
}
unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
@@ -852,34 +857,6 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
return MI && TII->isVALU(*MI);
}
- bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const {
- if (Pred->NodeNum < Succ->NodeNum)
- return true;
-
- SmallVector<const SUnit*, 64> Succs({Succ}), Preds({Pred});
-
- for (unsigned I = 0; I < Succs.size(); ++I) {
- for (const SDep &SI : Succs[I]->Succs) {
- const SUnit *SU = SI.getSUnit();
- if (SU != Succs[I] && !llvm::is_contained(Succs, SU))
- Succs.push_back(SU);
- }
- }
-
- SmallPtrSet<const SUnit*, 32> Visited;
- while (!Preds.empty()) {
- const SUnit *SU = Preds.pop_back_val();
- if (llvm::is_contained(Succs, SU))
- return false;
- Visited.insert(SU);
- for (const SDep &SI : SU->Preds)
- if (SI.getSUnit() != SU && !Visited.count(SI.getSUnit()))
- Preds.push_back(SI.getSUnit());
- }
-
- return true;
- }
-
// Link as many SALU instructions in chain as possible. Return the size
// of the chain. Links up to MaxChain instructions.
unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
@@ -895,18 +872,20 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From);
dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n');
- if (SU->addPred(SDep(From, SDep::Artificial), false))
- ++Linked;
+ if (SU != From && From != &DAG->ExitSU && DAG->canAddEdge(SU, From))
+ if (DAG->addEdge(SU, SDep(From, SDep::Artificial)))
+ ++Linked;
for (SDep &SI : From->Succs) {
SUnit *SUv = SI.getSUnit();
- if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU))
- SUv->addPred(SDep(SU, SDep::Artificial), false);
+ if (SUv != From && SU != &DAG->ExitSU && isVALU(SUv) &&
+ DAG->canAddEdge(SUv, SU))
+ DAG->addEdge(SUv, SDep(SU, SDep::Artificial));
}
for (SDep &SI : SU->Succs) {
SUnit *Succ = SI.getSUnit();
- if (Succ != SU && isSALU(Succ) && canAddEdge(From, Succ))
+ if (Succ != SU && isSALU(Succ))
Worklist.push_back(Succ);
}
}
@@ -949,7 +928,8 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
if (Visited.count(&*LastSALU))
continue;
- if (!isSALU(&*LastSALU) || !canAddEdge(&*LastSALU, &SU))
+ if (&SU == &DAG->ExitSU || &SU == &*LastSALU || !isSALU(&*LastSALU) ||
+ !DAG->canAddEdge(&*LastSALU, &SU))
continue;
Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 971e44723758..dca926867300 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1584,6 +1584,9 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
AMDGPU::SGPR_32RegClass,
MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->LDSKernelId,
+ AMDGPU::SGPR_32RegClass,
+ MFI->ArgInfo.LDSKernelId, 0, 1) ||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
0, 1) ||
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index e12d0ffef35c..2a9393fc1595 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1739,6 +1739,8 @@ public:
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
+ void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
+ OptionalImmIndexMap &OptionalIdx);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx);
@@ -1767,21 +1769,11 @@ public:
void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
cvtDPP(Inst, Operands, true);
}
- void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands,
- bool IsDPP8 = false);
- void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
- cvtVOPCNoDstDPP(Inst, Operands, true);
- }
void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
bool IsDPP8 = false);
void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
cvtVOP3DPP(Inst, Operands, true);
}
- void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands,
- bool IsDPP8 = false);
- void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
- cvtVOPC64NoDstDPP(Inst, Operands, true);
- }
OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
AMDGPUOperand::ImmTy Type);
@@ -4177,7 +4169,9 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
return false;
}
- if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
+ uint64_t TSFlags = MII.get(Opc).TSFlags;
+
+ if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
if (OpSelIdx != -1) {
if (Inst.getOperand(OpSelIdx).getImm() != 0)
@@ -4190,6 +4184,15 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
}
}
+ // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
+ if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3) &&
+ !(TSFlags & SIInstrFlags::VOP3P)) {
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+ if (OpSel & 3)
+ return false;
+ }
+
return true;
}
@@ -4636,9 +4639,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
Error(IDLoc, "ABS not allowed in VOP3B instructions");
return false;
}
- if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
- return false;
- }
if (!validateExeczVcczOperands(Operands)) {
return false;
}
@@ -5004,6 +5004,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
Val, ValRange);
+ } else if (ID == ".amdhsa_uses_dynamic_stack") {
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
@@ -8024,10 +8027,13 @@ OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands)
return MatchOperand_NoMatch;
}
-void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
- cvtVOP3P(Inst, Operands);
-
+// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
+// the number of src operands present, then copies that bit into src0_modifiers.
+void cvtVOP3DstOpSelOnly(MCInst &Inst) {
int Opc = Inst.getOpcode();
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ if (OpSelIdx == -1)
+ return;
int SrcNum;
const int Ops[] = { AMDGPU::OpName::src0,
@@ -8038,7 +8044,6 @@ void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands)
++SrcNum);
assert(SrcNum > 0);
- int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
if ((OpSel & (1 << SrcNum)) != 0) {
@@ -8048,6 +8053,18 @@ void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands)
}
}
+void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
+ const OperandVector &Operands) {
+ cvtVOP3P(Inst, Operands);
+ cvtVOP3DstOpSelOnly(Inst);
+}
+
+void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
+ OptionalImmIndexMap &OptionalIdx) {
+ cvtVOP3P(Inst, Operands, OptionalIdx);
+ cvtVOP3DstOpSelOnly(Inst);
+}
+
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
// 1. This operand is input modifiers
return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
@@ -8241,6 +8258,12 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
+ if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
+ Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
+ Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
+ Inst.addOperand(Inst.getOperand(0));
+ }
+
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
assert(!IsPacked);
Inst.addOperand(Inst.getOperand(0));
@@ -8747,14 +8770,6 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
}
-// Add dummy $old operand
-void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst,
- const OperandVector &Operands,
- bool IsDPP8) {
- Inst.addOperand(MCOperand::createReg(0));
- cvtVOP3DPP(Inst, Operands, IsDPP8);
-}
-
void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
OptionalImmIndexMap OptionalIdx;
unsigned Opc = Inst.getOpcode();
@@ -8802,6 +8817,8 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bo
}
if (Desc.TSFlags & SIInstrFlags::VOP3P)
cvtVOP3P(Inst, Operands, OptionalIdx);
+ else if (Desc.TSFlags & SIInstrFlags::VOP3)
+ cvtVOP3OpSel(Inst, Operands, OptionalIdx);
else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
}
@@ -8821,14 +8838,6 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bo
}
}
-// Add dummy $old operand
-void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst,
- const OperandVector &Operands,
- bool IsDPP8) {
- Inst.addOperand(MCOperand::createReg(0));
- cvtDPP(Inst, Operands, IsDPP8);
-}
-
void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
OptionalImmIndexMap OptionalIdx;
@@ -9043,12 +9052,27 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
// v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
switch (BasicInstType) {
case SIInstrFlags::VOP1:
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
- if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::clamp) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTyClampSI, 0);
+ }
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::omod) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTyOModSI, 0);
+ }
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::dst_sel) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
+ }
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::dst_unused) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTySdwaDstUnused,
+ DstUnused::UNUSED_PRESERVE);
}
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
break;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index ccaf646008b1..98ee720200b4 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -451,7 +451,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
convertVOP3PDPPInst(MI);
else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
- convertVOPCDPPInst(MI);
+ convertVOPCDPPInst(MI); // Special VOP3 case
+ else {
+ assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
+ convertVOP3DPPInst(MI); // Regular VOP3 case
+ }
break;
}
Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address);
@@ -745,6 +749,43 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
return MCDisassembler::Success;
}
+struct VOPModifiers {
+ unsigned OpSel = 0;
+ unsigned OpSelHi = 0;
+ unsigned NegLo = 0;
+ unsigned NegHi = 0;
+};
+
+// Reconstruct values of VOP3/VOP3P operands such as op_sel.
+// Note that these values do not affect disassembler output,
+// so this is only necessary for consistency with src_modifiers.
+static VOPModifiers collectVOPModifiers(const MCInst &MI,
+ bool IsVOP3P = false) {
+ VOPModifiers Modifiers;
+ unsigned Opc = MI.getOpcode();
+ const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers};
+ for (int J = 0; J < 3; ++J) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+ if (OpIdx == -1)
+ continue;
+
+ unsigned Val = MI.getOperand(OpIdx).getImm();
+
+ Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
+ if (IsVOP3P) {
+ Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
+ Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
+ Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
+ } else if (J == 0) {
+ Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
+ }
+ }
+
+ return Modifiers;
+}
+
// We must check FI == literal to reject not genuine dpp8 insts, and we must
// first add optional MI operands to check FI
DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
@@ -755,6 +796,11 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
} else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
AMDGPU::isVOPC64DPP(Opc)) {
convertVOPCDPPInst(MI);
+ } else if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
+ auto Mods = collectVOPModifiers(MI);
+ insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
+ AMDGPU::OpName::op_sel);
} else {
// Insert dummy unused src modifiers.
if (MI.getNumOperands() < DescNumOps &&
@@ -770,6 +816,18 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
}
+DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
+ unsigned Opc = MI.getOpcode();
+ unsigned DescNumOps = MCII->get(Opc).getNumOperands();
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
+ auto Mods = collectVOPModifiers(MI);
+ insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
+ AMDGPU::OpName::op_sel);
+ }
+ return MCDisassembler::Success;
+}
+
// Note that before gfx10, the MIMG encoding provided no information about
// VADDR size. Consequently, decoded instructions always show address as if it
// has 1 dword, which could be not really so.
@@ -914,45 +972,27 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
unsigned Opc = MI.getOpcode();
unsigned DescNumOps = MCII->get(Opc).getNumOperands();
+ auto Mods = collectVOPModifiers(MI, true);
if (MI.getNumOperands() < DescNumOps &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1)
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
- const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
- AMDGPU::OpName::src1_modifiers,
- AMDGPU::OpName::src2_modifiers};
- unsigned OpSel = 0;
- unsigned OpSelHi = 0;
- unsigned NegLo = 0;
- unsigned NegHi = 0;
- for (int J = 0; J < 3; ++J) {
- int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
- if (OpIdx == -1)
- break;
- unsigned Val = MI.getOperand(OpIdx).getImm();
-
- OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
- OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
- NegLo |= !!(Val & SISrcMods::NEG) << J;
- NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
- }
-
if (MI.getNumOperands() < DescNumOps &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1)
- insertNamedMCOperand(MI, MCOperand::createImm(OpSel),
+ insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
AMDGPU::OpName::op_sel);
if (MI.getNumOperands() < DescNumOps &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi) != -1)
- insertNamedMCOperand(MI, MCOperand::createImm(OpSelHi),
+ insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi),
AMDGPU::OpName::op_sel_hi);
if (MI.getNumOperands() < DescNumOps &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo) != -1)
- insertNamedMCOperand(MI, MCOperand::createImm(NegLo),
+ insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo),
AMDGPU::OpName::neg_lo);
if (MI.getNumOperands() < DescNumOps &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi) != -1)
- insertNamedMCOperand(MI, MCOperand::createImm(NegHi),
+ insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
AMDGPU::OpName::neg_hi);
return MCDisassembler::Success;
@@ -2000,6 +2040,9 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
}
+ PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
+ KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
+
if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
return MCDisassembler::Fail;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 31869f0917ae..d17e2d8d5082 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -162,6 +162,7 @@ public:
DecodeStatus convertSDWAInst(MCInst &MI) const;
DecodeStatus convertDPP8Inst(MCInst &MI) const;
DecodeStatus convertMIMGInst(MCInst &MI) const;
+ DecodeStatus convertVOP3DPPInst(MCInst &MI) const;
DecodeStatus convertVOP3PDPPInst(MCInst &MI) const;
DecodeStatus convertVOPCDPPInst(MCInst &MI) const;
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 5d254518c67a..4558ddf6dbfe 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -202,6 +202,19 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
return nullptr;
}
+ int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
+ // Prior checks cover Mask with VOPC condition, but not on purpose
+ auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
+ assert(RowMaskOpnd && RowMaskOpnd->isImm());
+ auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
+ assert(BankMaskOpnd && BankMaskOpnd->isImm());
+ const bool MaskAllLanes =
+ RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
+ (void)MaskAllLanes;
+ assert(MaskAllLanes ||
+ !(TII->isVOPC(DPPOp) ||
+ (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) &&
+ "VOPC cannot form DPP unless mask is full");
auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
OrigMI.getDebugLoc(), TII->get(DPPOp))
@@ -234,6 +247,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
CombOldVGPR.SubReg);
++NumOperands;
+ } else if (TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
+ TII->isVOPC(OrigOpE32))) {
+ // VOPC DPP and VOPC promoted to VOP3 DPP do not have an old operand
+ // because they write to SGPRs not VGPRs
} else {
// TODO: this discards MAC/FMA instructions for now, let's add it later
LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction,"
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 281474994bca..6ff349e31f22 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -249,11 +249,11 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel,
//===----------------------------------------------------------------------===//
def : ProcessorModel<"gfx1100", GFX11SpeedModel,
- FeatureISAVersion11_0.Features
+ FeatureISAVersion11_0_0.Features
>;
def : ProcessorModel<"gfx1101", GFX11SpeedModel,
- FeatureISAVersion11_0.Features
+ FeatureISAVersion11_0_1.Features
>;
def : ProcessorModel<"gfx1102", GFX11SpeedModel,
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 100410bb7644..04da14cc4916 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -733,7 +733,7 @@ void GCNScheduleDAGMILive::collectRematerializableInstructions() {
MachineOperand *Op = MRI.getOneDef(Reg);
MachineInstr *Def = Op->getParent();
- if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def, AA))
+ if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
continue;
MachineInstr *UseI = &*MRI.use_instr_nodbg_begin(Reg);
@@ -943,9 +943,8 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
}
// Copied from MachineLICM
-bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) {
- if (!TII->isTriviallyReMaterializable(MI, AA))
+bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI) {
+ if (!TII->isTriviallyReMaterializable(MI))
return false;
for (const MachineOperand &MO : MI.operands())
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 97f94f69b70e..c3db849cf81a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -142,7 +142,7 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// and single use outside the defining block into RematerializableInsts.
void collectRematerializableInstructions();
- bool isTriviallyReMaterializable(const MachineInstr &MI, AAResults *AA);
+ bool isTriviallyReMaterializable(const MachineInstr &MI);
// TODO: Should also attempt to reduce RP of SGPRs and AGPRs
// Attempt to reduce RP of VGPR by sinking trivially rematerializable
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index d269d0945f3b..d71f80c5f458 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -145,6 +145,7 @@ protected:
bool HasDot7Insts = false;
bool HasDot8Insts = false;
bool HasMAIInsts = false;
+ bool HasFP8Insts = false;
bool HasPkFmacF16Inst = false;
bool HasAtomicFaddRtnInsts = false;
bool HasAtomicFaddNoRtnInsts = false;
@@ -721,6 +722,10 @@ public:
return HasMAIInsts;
}
+ bool hasFP8Insts() const {
+ return HasFP8Insts;
+ }
+
bool hasPkFmacF16Inst() const {
return HasPkFmacF16Inst;
}
@@ -930,7 +935,7 @@ public:
}
bool hasUserSGPRInit16Bug() const {
- return UserSGPRInit16Bug;
+ return UserSGPRInit16Bug && isWave32();
}
bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index bd938d829953..21ff2744e5b4 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -627,7 +627,7 @@ void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc,
unsigned OpNo) const {
- return OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) &&
+ return OpNo == 0 && (Desc.TSFlags & SIInstrFlags::DPP) &&
(Desc.TSFlags & SIInstrFlags::VOPC) &&
(Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO));
@@ -644,8 +644,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
// If there are printed modifiers, printOperandAndFPInputMods or
// printOperandAndIntInputMods will be called instead
if ((OpNo == 0 ||
- (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP)) ||
- (OpNo == 2 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) &&
+ (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) &&
(Desc.TSFlags & SIInstrFlags::VOPC) &&
(Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)))
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 078133469549..0e71509cf2bd 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -367,6 +367,8 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
kernel_code_properties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+ PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
PRINT_FIELD(OS,
(hasArchitectedFlatScratch(STI)
? ".amdhsa_enable_private_segment"
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index f54778535b7c..3e95c55df57e 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -67,6 +67,7 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
@@ -81,9 +82,9 @@ static cl::opt<bool> EnableM0Merge(
cl::init(true));
namespace {
-
class SIFixSGPRCopies : public MachineFunctionPass {
MachineDominatorTree *MDT;
+ unsigned NextVGPRToSGPRCopyID;
public:
static char ID;
@@ -92,9 +93,16 @@ public:
const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
- SIFixSGPRCopies() : MachineFunctionPass(ID) {}
+ SIFixSGPRCopies() : MachineFunctionPass(ID), NextVGPRToSGPRCopyID(0) {}
bool runOnMachineFunction(MachineFunction &MF) override;
+ unsigned getNextVGPRToSGPRCopyId() { return ++NextVGPRToSGPRCopyID; }
+ void lowerVGPR2SGPRCopies(MachineFunction &MF);
+ // Handles copies which source register is:
+ // 1. Physical register
+ // 2. AGPR
+ // 3. Defined by the instruction the merely moves the immediate
+ bool lowerSpecialCase(MachineInstr &MI);
MachineBasicBlock *processPHINode(MachineInstr &MI);
@@ -569,6 +577,14 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
TII = ST.getInstrInfo();
MDT = &getAnalysis<MachineDominatorTree>();
+ // We have to lower VGPR to SGPR copies before the main loop
+ // because the REG_SEQUENCE and PHI lowering in main loop
+ // convert the def-use chains to VALU and close the opportunities
+ // for keeping them scalar.
+ // TODO: REG_SEQENCE and PHIs are semantically copies. The next patch
+ // addresses their lowering and unify the processing in one main loop.
+ lowerVGPR2SGPRCopies(MF);
+
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock *MBB = &*BI;
@@ -640,42 +656,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
- Register SrcReg = MI.getOperand(1).getReg();
- if (!SrcReg.isVirtual()) {
- MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
- if (NewBB && NewBB != MBB) {
- MBB = NewBB;
- E = MBB->end();
- BI = MachineFunction::iterator(MBB);
- BE = MF.end();
- }
- assert((!NewBB || NewBB == I->getParent()) &&
- "moveToVALU did not return the right basic block");
- break;
- }
-
- MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
- unsigned SMovOp;
- int64_t Imm;
- // If we are just copying an immediate, we can replace the copy with
- // s_mov_b32.
- if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
- MI.getOperand(1).ChangeToImmediate(Imm);
- MI.addImplicitDefUseOperands(MF);
- MI.setDesc(TII->get(SMovOp));
- break;
- }
- MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
- if (NewBB && NewBB != MBB) {
- MBB = NewBB;
- E = MBB->end();
- BI = MachineFunction::iterator(MBB);
- BE = MF.end();
- }
- assert((!NewBB || NewBB == I->getParent()) &&
- "moveToVALU did not return the right basic block");
- } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
+ if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
}
@@ -916,3 +897,269 @@ MachineBasicBlock *SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
}
return CreatedBB;
}
+
+bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI) {
+ MachineBasicBlock *MBB = MI.getParent();
+ const TargetRegisterClass *SrcRC, *DstRC;
+ std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
+
+ // We return true to indicate that no further processing needed
+ if (!isVGPRToSGPRCopy(SrcRC, DstRC, *TRI))
+ return true;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!SrcReg.isVirtual() || TRI->isAGPR(*MRI, SrcReg)) {
+ TII->moveToVALU(MI, MDT);
+ return true;
+ }
+
+ unsigned SMovOp;
+ int64_t Imm;
+ // If we are just copying an immediate, we can replace the copy with
+ // s_mov_b32.
+ if (isSafeToFoldImmIntoCopy(&MI, MRI->getVRegDef(SrcReg), TII, SMovOp, Imm)) {
+ MI.getOperand(1).ChangeToImmediate(Imm);
+ MI.addImplicitDefUseOperands(*MBB->getParent());
+ MI.setDesc(TII->get(SMovOp));
+ return true;
+ }
+ return false;
+}
+
+class V2SCopyInfo {
+public:
+ // VGPR to SGPR copy being processed
+ MachineInstr *Copy;
+ // All SALU instructions reachable from this copy in SSA graph
+ DenseSet<MachineInstr *> SChain;
+ // Number of SGPR to VGPR copies that are used to put the SALU computation
+ // results back to VALU.
+ unsigned NumSVCopies;
+
+ unsigned Score;
+ // Actual count of v_readfirstlane_b32
+ // which need to be inserted to keep SChain SALU
+ unsigned NumReadfirstlanes;
+ // Current score state. To speedup selection V2SCopyInfos for processing
+ bool NeedToBeConvertedToVALU = false;
+ // Unique ID. Used as a key for mapping to keep permanent order.
+ unsigned ID;
+
+ // Count of another VGPR to SGPR copies that contribute to the
+ // current copy SChain
+ unsigned SiblingPenalty = 0;
+ SetVector<unsigned> Siblings;
+ V2SCopyInfo() : Copy(nullptr), ID(0){};
+ V2SCopyInfo(unsigned Id, MachineInstr *C, unsigned Width)
+ : Copy(C), NumSVCopies(0), NumReadfirstlanes(Width / 32), ID(Id){};
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump() {
+ dbgs() << ID << " : " << *Copy << "\n\tS:" << SChain.size()
+ << "\n\tSV:" << NumSVCopies << "\n\tSP: " << SiblingPenalty
+ << "\nScore: " << Score << "\n";
+ }
+#endif
+};
+
+void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
+
+ DenseMap<unsigned, V2SCopyInfo> Copies;
+ DenseMap<MachineInstr *, SetVector<unsigned>> SiblingPenalty;
+
+ // The main function that computes the VGPR to SGPR copy score
+ // and determines copy further lowering way: v_readfirstlane_b32 or moveToVALU
+ auto needToBeConvertedToVALU = [&](V2SCopyInfo *I) -> bool {
+ if (I->SChain.empty())
+ return true;
+ I->Siblings = SiblingPenalty[*std::max_element(
+ I->SChain.begin(), I->SChain.end(),
+ [&](MachineInstr *A, MachineInstr *B) -> bool {
+ return SiblingPenalty[A].size() < SiblingPenalty[B].size();
+ })];
+ I->Siblings.remove_if([&](unsigned ID) { return ID == I->ID; });
+ // The loop below computes the number of another VGPR to SGPR copies
+ // which contribute to the current copy SALU chain. We assume that all the
+ // copies with the same source virtual register will be squashed to one by
+ // regalloc. Also we take careof the copies of the differnt subregs of the
+ // same register.
+ SmallSet<std::pair<Register, unsigned>, 4> SrcRegs;
+ for (auto J : I->Siblings) {
+ auto InfoIt = Copies.find(J);
+ if (InfoIt != Copies.end()) {
+ MachineInstr *SiblingCopy = InfoIt->getSecond().Copy;
+ if (SiblingCopy->isImplicitDef())
+ // the COPY has already been MoveToVALUed
+ continue;
+
+ SrcRegs.insert(std::make_pair(SiblingCopy->getOperand(1).getReg(),
+ SiblingCopy->getOperand(1).getSubReg()));
+ }
+ }
+ I->SiblingPenalty = SrcRegs.size();
+
+ unsigned Penalty =
+ I->NumSVCopies + I->SiblingPenalty + I->NumReadfirstlanes;
+ unsigned Profit = I->SChain.size();
+ I->Score = Penalty > Profit ? 0 : Profit - Penalty;
+ I->NeedToBeConvertedToVALU = I->Score < 3;
+ return I->NeedToBeConvertedToVALU;
+ };
+
+ auto needProcessing = [](MachineInstr &MI) -> bool {
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::WQM:
+ case AMDGPU::STRICT_WQM:
+ case AMDGPU::SOFT_WQM:
+ case AMDGPU::STRICT_WWM:
+ return true;
+ default:
+ return false;
+ }
+ };
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+ ++BI) {
+ MachineBasicBlock *MBB = &*BI;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ MachineInstr &MI = *I;
+ if (!needProcessing(MI))
+ continue;
+ if (lowerSpecialCase(MI))
+ continue;
+
+ // Compute the COPY width to pass it to V2SCopyInfo Ctor
+ Register DstReg = MI.getOperand(0).getReg();
+
+ const TargetRegisterClass *DstRC = TRI->getRegClassForReg(*MRI, DstReg);
+
+ V2SCopyInfo In(getNextVGPRToSGPRCopyId(), &MI,
+ TRI->getRegSizeInBits(*DstRC));
+
+ SmallVector<MachineInstr *, 8> AnalysisWorklist;
+ // Needed because the SSA is not a tree but a graph and may have
+ // forks and joins. We should not then go same way twice.
+ DenseSet<MachineInstr *> Visited;
+ AnalysisWorklist.push_back(&MI);
+ while (!AnalysisWorklist.empty()) {
+
+ MachineInstr *Inst = AnalysisWorklist.pop_back_val();
+
+ if (!Visited.insert(Inst).second)
+ continue;
+
+ // Copies and REG_SEQUENCE do not contribute to the final assembly
+ // So, skip them but take care of the SGPR to VGPR copies bookkeeping.
+ if (Inst->isCopy() || Inst->isRegSequence()) {
+ if (TRI->isVGPR(*MRI, Inst->getOperand(0).getReg())) {
+ if (!Inst->isCopy() ||
+ !tryChangeVGPRtoSGPRinCopy(*Inst, TRI, TII)) {
+ In.NumSVCopies++;
+ continue;
+ }
+ }
+ }
+
+ SiblingPenalty[Inst].insert(In.ID);
+
+ SmallVector<MachineInstr *, 4> Users;
+ if ((TII->isSALU(*Inst) && Inst->isCompare()) ||
+ (Inst->isCopy() && Inst->getOperand(0).getReg() == AMDGPU::SCC)) {
+ auto I = Inst->getIterator();
+ auto E = Inst->getParent()->end();
+ while (++I != E && !I->findRegisterDefOperand(AMDGPU::SCC)) {
+ if (I->readsRegister(AMDGPU::SCC))
+ Users.push_back(&*I);
+ }
+ } else if (Inst->getNumExplicitDefs() != 0) {
+ Register Reg = Inst->getOperand(0).getReg();
+ if (TRI->isSGPRReg(*MRI, Reg))
+ for (auto &U : MRI->use_instructions(Reg))
+ Users.push_back(&U);
+ }
+ for (auto U : Users) {
+ if (TII->isSALU(*U))
+ In.SChain.insert(U);
+ AnalysisWorklist.push_back(U);
+ }
+ }
+ Copies[In.ID] = In;
+ }
+ }
+
+ SmallVector<unsigned, 8> LoweringWorklist;
+ for (auto &C : Copies) {
+ if (needToBeConvertedToVALU(&C.second))
+ LoweringWorklist.push_back(C.second.ID);
+ }
+
+ while (!LoweringWorklist.empty()) {
+ unsigned CurID = LoweringWorklist.pop_back_val();
+ auto CurInfoIt = Copies.find(CurID);
+ if (CurInfoIt != Copies.end()) {
+ V2SCopyInfo C = CurInfoIt->getSecond();
+ LLVM_DEBUG(dbgs() << "Processing ...\n"; C.dump());
+ for (auto S : C.Siblings) {
+ auto SibInfoIt = Copies.find(S);
+ if (SibInfoIt != Copies.end()) {
+ V2SCopyInfo &SI = SibInfoIt->getSecond();
+ LLVM_DEBUG(dbgs() << "Sibling:\n"; SI.dump());
+ if (!SI.NeedToBeConvertedToVALU) {
+ set_subtract(SI.SChain, C.SChain);
+ if (needToBeConvertedToVALU(&SI))
+ LoweringWorklist.push_back(SI.ID);
+ }
+ SI.Siblings.remove_if([&](unsigned ID) { return ID == C.ID; });
+ }
+ }
+ LLVM_DEBUG(dbgs() << "V2S copy " << *C.Copy
+ << " is being turned to VALU\n");
+ Copies.erase(C.ID);
+ TII->moveToVALU(*C.Copy, MDT);
+ }
+ }
+
+ // Now do actual lowering
+ for (auto C : Copies) {
+ MachineInstr *MI = C.second.Copy;
+ MachineBasicBlock *MBB = MI->getParent();
+ // We decide to turn V2S copy to v_readfirstlane_b32
+ // remove it from the V2SCopies and remove it from all its siblings
+ LLVM_DEBUG(dbgs() << "V2S copy " << *MI
+ << " is being turned to v_readfirstlane_b32"
+ << " Score: " << C.second.Score << "\n");
+ Register DstReg = MI->getOperand(0).getReg();
+ Register SrcReg = MI->getOperand(1).getReg();
+ unsigned SubReg = MI->getOperand(1).getSubReg();
+ const TargetRegisterClass *SrcRC = TRI->getRegClassForReg(*MRI, SrcReg);
+ SrcRC = TRI->getSubRegClass(SrcRC, SubReg);
+ size_t SrcSize = TRI->getRegSizeInBits(*SrcRC);
+ if (SrcSize == 16) {
+ // HACK to handle possible 16bit VGPR source
+ auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg);
+ MIB.addReg(SrcReg, 0, AMDGPU::NoSubRegister);
+ } else if (SrcSize == 32) {
+ auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg);
+ MIB.addReg(SrcReg, 0, SubReg);
+ } else {
+ auto Result = BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::REG_SEQUENCE), DstReg);
+ int N = TRI->getRegSizeInBits(*SrcRC) / 32;
+ for (int i = 0; i < N; i++) {
+ Register PartialSrc = TII->buildExtractSubReg(
+ Result, *MRI, MI->getOperand(1), SrcRC,
+ TRI->getSubRegFromChannel(i), &AMDGPU::VGPR_32RegClass);
+ Register PartialDst =
+ MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, *Result, Result->getDebugLoc(),
+ TII->get(AMDGPU::V_READFIRSTLANE_B32), PartialDst)
+ .addReg(PartialSrc);
+ Result.addReg(PartialDst).addImm(TRI->getSubRegFromChannel(i));
+ }
+ }
+ MI->eraseFromParent();
+ }
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d16da2a8b86b..438e8b200ecc 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1664,6 +1664,17 @@ SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset);
}
+SDValue SITargetLowering::getLDSKernelId(SelectionDAG &DAG,
+ const SDLoc &SL) const {
+
+ Function &F = DAG.getMachineFunction().getFunction();
+ Optional<uint32_t> KnownSize =
+ AMDGPUMachineFunction::getLDSKernelIdMetadata(F);
+ if (KnownSize.has_value())
+ return DAG.getConstant(KnownSize.value(), SL, MVT::i32);
+ return SDValue();
+}
+
SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Val,
bool Signed,
@@ -2049,6 +2060,9 @@ void SITargetLowering::allocateSpecialInputSGPRs(
if (Info.hasWorkGroupIDZ())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);
+
+ if (Info.hasLDSKernelId())
+ allocateSGPR32Input(CCInfo, ArgInfo.LDSKernelId);
}
// Allocate special inputs passed in user SGPRs.
@@ -2102,6 +2116,12 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(FlatScratchInitReg);
}
+ if (Info.hasLDSKernelId()) {
+ Register Reg = Info.addLDSKernelId();
+ MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+
// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
// these from the dispatch pointer.
}
@@ -2347,8 +2367,8 @@ SDValue SITargetLowering::LowerFormalArguments(
(!Info->hasFlatScratchInit() || Subtarget->enableFlatScratch()) &&
!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
- !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
- !Info->hasWorkItemIDZ());
+ !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() &&
+ !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ());
}
if (CallConv == CallingConv::AMDGPU_PS) {
@@ -2762,7 +2782,8 @@ void SITargetLowering::passSpecialInputs(
{AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"},
{AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
{AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"},
- {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"}
+ {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"},
+ {AMDGPUFunctionArgInfo::LDS_KERNEL_ID,"amdgpu-no-lds-kernel-id"},
};
for (auto Attr : ImplicitAttrs) {
@@ -2798,6 +2819,13 @@ void SITargetLowering::passSpecialInputs(
// The implicit arg ptr is special because it doesn't have a corresponding
// input for kernels, and is computed from the kernarg segment pointer.
InputReg = getImplicitArgPtr(DAG, DL);
+ } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) {
+ Optional<uint32_t> Id = AMDGPUMachineFunction::getLDSKernelIdMetadata(F);
+ if (Id.has_value()) {
+ InputReg = DAG.getConstant(Id.value(), DL, ArgVT);
+ } else {
+ InputReg = DAG.getUNDEF(ArgVT);
+ }
} else {
// We may have proven the input wasn't needed, although the ABI is
// requiring it. We just need to allocate the register appropriately.
@@ -6887,6 +6915,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_workgroup_id_z:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_lds_kernel_id: {
+ if (MFI->isEntryFunction())
+ return getLDSKernelId(DAG, DL);
+ return getPreloadedValue(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
+ }
case Intrinsic::amdgcn_workitem_id_x:
return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
case Intrinsic::amdgcn_workitem_id_y:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 4fbccf0c5850..d1fecc1afc7f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -48,6 +48,7 @@ private:
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
+ SDValue getLDSKernelId(SelectionDAG &DAG, const SDLoc &SL) const;
SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
uint64_t Offset, Align Alignment,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index b398e108bf62..7c1d8d32b624 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -85,7 +85,7 @@ class InstSI <dag outs, dag ins, string asm = "",
field bit VOPAsmPrefer32Bit = 0;
// This bit indicates that this is a VOP3 opcode which supports op_sel
- // modifier (gfx9 only).
+ // modifier.
field bit VOP3_OPSEL = 0;
// Is it possible for this instruction to be atomic?
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 799d34e32d27..8916f06598c6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -108,8 +108,8 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx);
}
-bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const {
+bool SIInstrInfo::isReallyTriviallyReMaterializable(
+ const MachineInstr &MI) const {
if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) {
// Normally VALU use of exec would block the rematerialization, but that
// is OK in this case to have an implicit exec read as all VALU do.
@@ -220,16 +220,23 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
return false;
- assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
+ unsigned NumOps = getNumOperandsNoGlue(Load0);
+ if (NumOps != getNumOperandsNoGlue(Load1))
+ return false;
// Check base reg.
if (Load0->getOperand(0) != Load1->getOperand(0))
return false;
+ // Match register offsets, if both register and immediate offsets present.
+ assert(NumOps == 4 || NumOps == 5);
+ if (NumOps == 5 && Load0->getOperand(1) != Load1->getOperand(1))
+ return false;
+
const ConstantSDNode *Load0Offset =
- dyn_cast<ConstantSDNode>(Load0->getOperand(1));
+ dyn_cast<ConstantSDNode>(Load0->getOperand(NumOps - 3));
const ConstantSDNode *Load1Offset =
- dyn_cast<ConstantSDNode>(Load1->getOperand(1));
+ dyn_cast<ConstantSDNode>(Load1->getOperand(NumOps - 3));
if (!Load0Offset || !Load1Offset)
return false;
@@ -5011,10 +5018,8 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
}
if (MO->isReg()) {
- if (!DefinedRC) {
- // This operand allows any register.
- return true;
- }
+ if (!DefinedRC)
+ return OpInfo.OperandType == MCOI::OPERAND_UNKNOWN;
if (!isLegalRegOperand(MRI, OpInfo, *MO))
return false;
bool IsAGPR = RI.isAGPR(MRI, MO->getReg());
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 1b411eb83eb3..5840f45bdc5a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -184,8 +184,7 @@ public:
return ST;
}
- bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const override;
+ bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
bool isIgnorableUse(const MachineOperand &MO) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 23afd6556bc9..81f8dcc482da 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -324,7 +324,8 @@ class isFloatType<ValueType SrcVT> {
// XXX - do v2i16 instructions?
class isIntType<ValueType SrcVT> {
- bit ret = !or(!eq(SrcVT.Value, i16.Value),
+ bit ret = !or(!eq(SrcVT.Value, i8.Value),
+ !eq(SrcVT.Value, i16.Value),
!eq(SrcVT.Value, i32.Value),
!eq(SrcVT.Value, i64.Value),
!eq(SrcVT.Value, v4i16.Value),
@@ -1411,6 +1412,10 @@ class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
+def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> {
+ let Name = "SDWAWithBin32InputMods";
+ let ParserMethod = "parseRegOrImm";
+}
class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
InputMods <matchClass> {
@@ -1419,6 +1424,7 @@ class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
+def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>;
def IntVRegInputModsMatchClass : AsmOperandClass {
let Name = "VRegWithIntInputMods";
@@ -1897,94 +1903,94 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> {
- dag ret = !if (!eq(NumSrcArgs, 0),
+ dag ret = !if(!eq(NumSrcArgs, 0),
// VOP1 without input operands (V_NOP)
(ins ),
- !if (!eq(NumSrcArgs, 1),
- !if (HasModifiers,
- // VOP1_DPP with modifiers
- (ins OldRC:$old, Src0Mod:$src0_modifiers,
- Src0RC:$src0)
- /* else */,
- // VOP1_DPP without modifiers
- (ins OldRC:$old, Src0RC:$src0)
- /* endif */),
- !if (!eq(NumSrcArgs, 2),
- !if (HasModifiers,
- // VOP2_DPP with modifiers
- (ins OldRC:$old,
- Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1)
- /* else */,
- // VOP2_DPP without modifiers
- (ins OldRC:$old,
- Src0RC:$src0, Src1RC:$src1)
- )
- /* NumSrcArgs == 3, VOP3 */,
- !if (HasModifiers,
- // VOP3_DPP with modifiers
- (ins OldRC:$old,
- Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2)
- /* else */,
- // VOP3_DPP without modifiers
- (ins OldRC:$old,
- Src0RC:$src0, Src1RC:$src1,
- Src2RC:$src2)
+ !con(
+ !if(HasOld ,(ins OldRC:$old), (ins)),
+ !if (!eq(NumSrcArgs, 1),
+ !if (HasModifiers,
+ // VOP1_DPP with modifiers
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0)
+ /* else */,
+ // VOP1_DPP without modifiers
+ (ins Src0RC:$src0)
+ /* endif */),
+ !if (!eq(NumSrcArgs, 2),
+ !if (HasModifiers,
+ // VOP2_DPP with modifiers
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1)
+ /* else */,
+ // VOP2_DPP without modifiers
+ (ins Src0RC:$src0, Src1RC:$src1)
+ )
+ /* NumSrcArgs == 3, VOP3 */,
+ !if (HasModifiers,
+ // VOP3_DPP with modifiers
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2)
+ /* else */,
+ // VOP3_DPP without modifiers
+ (ins Src0RC:$src0, Src1RC:$src1,
+ Src2RC:$src2)
+ )
+ )
+ )
)
- /* endif */)));
+ );
}
class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret,
+ HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
(ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
}
class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret,
+ HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
(ins FI:$fi));
}
class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret,
+ HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
(ins dpp8:$dpp8, FI:$fi));
}
-class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
+class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> {
dag old = ( ins OldRC:$old );
dag base = VOP3Base;
dag ret = !con(
- !if(!ne(NumSrcArgs, 0), old, (ins)),
+ !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)),
base
);
}
-class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
- dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret,
+class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
+ dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
(ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
}
-class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
- dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs>.ret,
+class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
+ dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
(ins FI:$fi));
}
-class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
- dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret,
+class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
+ dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
(ins dpp8:$dpp8, FI:$fi));
}
@@ -2665,6 +2671,8 @@ def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>;
def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>;
def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>;
def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>;
+def VOP_V4F32_I64_I64_V4F32 : VOPProfile <[v4f32, i64, i64, v4f32]>;
+def VOP_V16F32_I64_I64_V16F32 : VOPProfile <[v16f32, i64, i64, v16f32]>;
def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>;
def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>;
@@ -2672,6 +2680,8 @@ def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>;
def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>;
def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>;
def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>;
+def VOP_V4F32_V2I32_V4I32_I32 : VOPProfile <[v4f32, v2i32, v4i32, i32]>;
+def VOP_V16F32_V2I32_V4I32_I32 : VOPProfile <[v16f32, v2i32, v4i32, i32]>;
class Commutable_REV <string revOp, bit isOrig> {
string RevOp = revOp;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 0504c59ebd9e..9176e85568ee 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -44,6 +44,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkGroupIDY(false),
WorkGroupIDZ(false),
WorkGroupInfo(false),
+ LDSKernelId(false),
PrivateSegmentWaveByteOffset(false),
WorkItemIDX(false),
WorkItemIDY(false),
@@ -143,6 +144,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
DispatchID = true;
+
+ if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
+ LDSKernelId = true;
}
// FIXME: This attribute is a hack, we just need an analysis on the function
@@ -261,6 +265,12 @@ Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI)
return ArgInfo.ImplicitBufferPtr.getRegister();
}
+Register SIMachineFunctionInfo::addLDSKernelId() {
+ ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR());
+ NumUserSGPRs += 1;
+ return ArgInfo.LDSKernelId.getRegister();
+}
+
bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
MCPhysReg Reg) {
for (unsigned I = 0; CSRegs[I]; ++I) {
@@ -561,6 +571,7 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
+ Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId);
Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index bebb13cbf09f..5105587617fd 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -191,6 +191,7 @@ struct SIArgumentInfo {
Optional<SIArgument> WorkGroupIDY;
Optional<SIArgument> WorkGroupIDZ;
Optional<SIArgument> WorkGroupInfo;
+ Optional<SIArgument> LDSKernelId;
Optional<SIArgument> PrivateSegmentWaveByteOffset;
Optional<SIArgument> ImplicitArgPtr;
@@ -215,6 +216,7 @@ template <> struct MappingTraits<SIArgumentInfo> {
YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
+ YamlIO.mapOptional("LDSKernelId", AI.LDSKernelId);
YamlIO.mapOptional("privateSegmentWaveByteOffset",
AI.PrivateSegmentWaveByteOffset);
@@ -418,6 +420,7 @@ private:
bool WorkGroupIDY : 1;
bool WorkGroupIDZ : 1;
bool WorkGroupInfo : 1;
+ bool LDSKernelId : 1;
bool PrivateSegmentWaveByteOffset : 1;
bool WorkItemIDX : 1; // Always initialized.
@@ -608,6 +611,7 @@ public:
Register addDispatchID(const SIRegisterInfo &TRI);
Register addFlatScratchInit(const SIRegisterInfo &TRI);
Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
+ Register addLDSKernelId();
/// Increment user SGPRs used for padding the argument list only.
Register addReservedUserSGPR() {
@@ -705,6 +709,8 @@ public:
return WorkGroupInfo;
}
+ bool hasLDSKernelId() const { return LDSKernelId; }
+
bool hasPrivateSegmentWaveByteOffset() const {
return PrivateSegmentWaveByteOffset;
}
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 66bc46aaefea..19a83ad53e2e 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -12,6 +12,8 @@
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -26,6 +28,10 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
const MachineRegisterInfo *MRI = nullptr;
+ MCRegister Exec;
+
+ DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping;
+ SmallVector<std::pair<MachineInstr *, MachineInstr *>, 1> OrXors;
Register isCopyFromExec(const MachineInstr &MI) const;
Register isCopyToExec(const MachineInstr &MI) const;
@@ -44,13 +50,13 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
std::function<bool(MachineInstr *)> Pred,
ArrayRef<MCRegister> NonModifiableRegs,
unsigned MaxInstructions = 20) const;
- MachineInstr *findPossibleVCMPVCMPXOptimization(MachineInstr &SaveExec,
- MCRegister Exec) const;
- bool optimizeExecSequence() const;
- bool optimizeVCmpxAndSaveexecSequence() const;
- bool optimizeSingleVCMPSaveExecSequence(MachineInstr &SaveExecInstr,
- MachineInstr &VCmp,
- MCRegister Exec) const;
+ bool optimizeExecSequence();
+ void tryRecordVCmpxAndSaveexecSequence(MachineInstr &MI);
+ bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr,
+ MachineInstr &VCmp, MCRegister Exec) const;
+
+ void tryRecordOrSaveexecXorSequence(MachineInstr &MI);
+ bool optimizeOrSaveexecXorSequences();
public:
static char ID;
@@ -92,7 +98,7 @@ Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const {
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B32_term: {
const MachineOperand &Src = MI.getOperand(1);
- if (Src.isReg() && Src.getReg() == TRI->getExec())
+ if (Src.isReg() && Src.getReg() == Exec)
return MI.getOperand(0).getReg();
}
}
@@ -107,8 +113,7 @@ Register SIOptimizeExecMasking::isCopyToExec(const MachineInstr &MI) const {
case AMDGPU::S_MOV_B64:
case AMDGPU::S_MOV_B32: {
const MachineOperand &Dst = MI.getOperand(0);
- if (Dst.isReg() && Dst.getReg() == TRI->getExec() &&
- MI.getOperand(1).isReg())
+ if (Dst.isReg() && Dst.getReg() == Exec && MI.getOperand(1).isReg())
return MI.getOperand(1).getReg();
break;
}
@@ -394,9 +399,7 @@ bool SIOptimizeExecMasking::isRegisterInUseAfter(MachineInstr &Stop,
// =>
// x = s_<op>_saveexec_b64 y
//
-bool SIOptimizeExecMasking::optimizeExecSequence() const {
- MCRegister Exec = TRI->getExec();
-
+bool SIOptimizeExecMasking::optimizeExecSequence() {
bool Changed = false;
for (MachineBasicBlock &MBB : *MF) {
MachineBasicBlock::reverse_iterator I = fixTerminators(MBB);
@@ -551,88 +554,9 @@ bool SIOptimizeExecMasking::optimizeExecSequence() const {
return Changed;
}
-// Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec sequence
-// by looking at an instance of a s_and_saveexec instruction. Returns a pointer
-// to the v_cmp instruction if it is safe to replace the sequence (see the
-// conditions in the function body). This is after register allocation, so some
-// checks on operand dependencies need to be considered.
-MachineInstr *SIOptimizeExecMasking::findPossibleVCMPVCMPXOptimization(
- MachineInstr &SaveExec, MCRegister Exec) const {
-
- MachineInstr *VCmp = nullptr;
-
- Register SaveExecDest = SaveExec.getOperand(0).getReg();
- if (!TRI->isSGPRReg(*MRI, SaveExecDest))
- return nullptr;
-
- MachineOperand *SaveExecSrc0 =
- TII->getNamedOperand(SaveExec, AMDGPU::OpName::src0);
- if (!SaveExecSrc0->isReg())
- return nullptr;
-
- // Try to find the last v_cmp instruction that defs the saveexec input
- // operand without any write to Exec or the saveexec input operand inbetween.
- VCmp = findInstrBackwards(
- SaveExec,
- [&](MachineInstr *Check) {
- return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 &&
- Check->modifiesRegister(SaveExecSrc0->getReg(), TRI);
- },
- {Exec, SaveExecSrc0->getReg()});
-
- if (!VCmp)
- return nullptr;
-
- MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst);
- assert(VCmpDest && "Should have an sdst operand!");
-
- // Check if any of the v_cmp source operands is written by the saveexec.
- MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0);
- if (Src0->isReg() && TRI->isSGPRReg(*MRI, Src0->getReg()) &&
- SaveExec.modifiesRegister(Src0->getReg(), TRI))
- return nullptr;
-
- MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1);
- if (Src1->isReg() && TRI->isSGPRReg(*MRI, Src1->getReg()) &&
- SaveExec.modifiesRegister(Src1->getReg(), TRI))
- return nullptr;
-
- // Don't do the transformation if the destination operand is included in
- // it's MBB Live-outs, meaning it's used in any of it's successors, leading
- // to incorrect code if the v_cmp and therefore the def of
- // the dest operand is removed.
- if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg()))
- return nullptr;
-
- // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the
- // s_and_saveexec, skip the optimization.
- if (isRegisterInUseBetween(*VCmp, SaveExec, VCmpDest->getReg(), false,
- true) ||
- isRegisterInUseAfter(SaveExec, VCmpDest->getReg()))
- return nullptr;
-
- // Try to determine if there is a write to any of the VCmp
- // operands between the saveexec and the vcmp.
- // If yes, additional VGPR spilling might need to be inserted. In this case,
- // it's not worth replacing the instruction sequence.
- SmallVector<MCRegister, 2> NonDefRegs;
- if (Src0->isReg())
- NonDefRegs.push_back(Src0->getReg());
-
- if (Src1->isReg())
- NonDefRegs.push_back(Src1->getReg());
-
- if (!findInstrBackwards(
- SaveExec, [&](MachineInstr *Check) { return Check == VCmp; },
- NonDefRegs))
- return nullptr;
-
- return VCmp;
-}
-
// Inserts the optimized s_mov_b32 / v_cmpx sequence based on the
// operands extracted from a v_cmp ..., s_and_saveexec pattern.
-bool SIOptimizeExecMasking::optimizeSingleVCMPSaveExecSequence(
+bool SIOptimizeExecMasking::optimizeVCMPSaveExecSequence(
MachineInstr &SaveExecInstr, MachineInstr &VCmp, MCRegister Exec) const {
const int NewOpcode = AMDGPU::getVCMPXOpFromVCMP(VCmp.getOpcode());
@@ -678,50 +602,164 @@ bool SIOptimizeExecMasking::optimizeSingleVCMPSaveExecSequence(
if (Src1->isReg())
MRI->clearKillFlags(Src1->getReg());
+ SaveExecInstr.eraseFromParent();
+ VCmp.eraseFromParent();
+
return true;
}
-// After all s_op_saveexec instructions are inserted,
-// replace (on GFX10.3 and later)
+// Record (on GFX10.3 and later) occurences of
// v_cmp_* SGPR, IMM, VGPR
// s_and_saveexec_b32 EXEC_SGPR_DEST, SGPR
-// with
+// to be replaced with
// s_mov_b32 EXEC_SGPR_DEST, exec_lo
// v_cmpx_* IMM, VGPR
// to reduce pipeline stalls.
-bool SIOptimizeExecMasking::optimizeVCmpxAndSaveexecSequence() const {
+void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence(
+ MachineInstr &MI) {
if (!ST->hasGFX10_3Insts())
- return false;
+ return;
- bool Changed = false;
-
- DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping;
- MCRegister Exec = TRI->getExec();
const unsigned AndSaveExecOpcode =
ST->isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
- for (MachineBasicBlock &MBB : *MF) {
- for (MachineInstr &MI : MBB) {
- // Record relevant v_cmp / s_and_saveexec instruction pairs for
- // replacement.
- if (MI.getOpcode() != AndSaveExecOpcode)
- continue;
+ if (MI.getOpcode() != AndSaveExecOpcode)
+ return;
+
+ Register SaveExecDest = MI.getOperand(0).getReg();
+ if (!TRI->isSGPRReg(*MRI, SaveExecDest))
+ return;
- if (MachineInstr *VCmp = findPossibleVCMPVCMPXOptimization(MI, Exec))
- SaveExecVCmpMapping[&MI] = VCmp;
+ MachineOperand *SaveExecSrc0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ if (!SaveExecSrc0->isReg())
+ return;
+
+ // Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec
+ // sequence by looking at an instance of a s_and_saveexec instruction. Returns
+ // a pointer to the v_cmp instruction if it is safe to replace the sequence
+ // (see the conditions in the function body). This is after register
+ // allocation, so some checks on operand dependencies need to be considered.
+ MachineInstr *VCmp = nullptr;
+
+ // Try to find the last v_cmp instruction that defs the saveexec input
+ // operand without any write to Exec or the saveexec input operand inbetween.
+ VCmp = findInstrBackwards(
+ MI,
+ [&](MachineInstr *Check) {
+ return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 &&
+ Check->modifiesRegister(SaveExecSrc0->getReg(), TRI);
+ },
+ {Exec, SaveExecSrc0->getReg()});
+
+ if (!VCmp)
+ return;
+
+ MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst);
+ assert(VCmpDest && "Should have an sdst operand!");
+
+ // Check if any of the v_cmp source operands is written by the saveexec.
+ MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0);
+ if (Src0->isReg() && TRI->isSGPRReg(*MRI, Src0->getReg()) &&
+ MI.modifiesRegister(Src0->getReg(), TRI))
+ return;
+
+ MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1);
+ if (Src1->isReg() && TRI->isSGPRReg(*MRI, Src1->getReg()) &&
+ MI.modifiesRegister(Src1->getReg(), TRI))
+ return;
+
+ // Don't do the transformation if the destination operand is included in
+ // it's MBB Live-outs, meaning it's used in any of it's successors, leading
+ // to incorrect code if the v_cmp and therefore the def of
+ // the dest operand is removed.
+ if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg()))
+ return;
+
+ // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the
+ // s_and_saveexec, skip the optimization.
+ if (isRegisterInUseBetween(*VCmp, MI, VCmpDest->getReg(), false, true) ||
+ isRegisterInUseAfter(MI, VCmpDest->getReg()))
+ return;
+
+ // Try to determine if there is a write to any of the VCmp
+ // operands between the saveexec and the vcmp.
+ // If yes, additional VGPR spilling might need to be inserted. In this case,
+ // it's not worth replacing the instruction sequence.
+ SmallVector<MCRegister, 2> NonDefRegs;
+ if (Src0->isReg())
+ NonDefRegs.push_back(Src0->getReg());
+
+ if (Src1->isReg())
+ NonDefRegs.push_back(Src1->getReg());
+
+ if (!findInstrBackwards(
+ MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs))
+ return;
+
+ if (VCmp)
+ SaveExecVCmpMapping[&MI] = VCmp;
+}
+
+// Record occurences of
+// s_or_saveexec s_o, s_i
+// s_xor exec, exec, s_o
+// to be replaced with
+// s_andn2_saveexec s_o, s_i.
+void SIOptimizeExecMasking::tryRecordOrSaveexecXorSequence(MachineInstr &MI) {
+ const unsigned XorOpcode =
+ ST->isWave32() ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64;
+
+ if (MI.getOpcode() == XorOpcode && &MI != &MI.getParent()->front()) {
+ const MachineOperand &XorDst = MI.getOperand(0);
+ const MachineOperand &XorSrc0 = MI.getOperand(1);
+ const MachineOperand &XorSrc1 = MI.getOperand(2);
+
+ if (XorDst.isReg() && XorDst.getReg() == Exec && XorSrc0.isReg() &&
+ XorSrc1.isReg() &&
+ (XorSrc0.getReg() == Exec || XorSrc1.getReg() == Exec)) {
+ const unsigned OrSaveexecOpcode = ST->isWave32()
+ ? AMDGPU::S_OR_SAVEEXEC_B32
+ : AMDGPU::S_OR_SAVEEXEC_B64;
+
+ // Peek at the previous instruction and check if this is a relevant
+ // s_or_saveexec instruction.
+ MachineInstr &PossibleOrSaveexec = *MI.getPrevNode();
+ if (PossibleOrSaveexec.getOpcode() != OrSaveexecOpcode)
+ return;
+
+ const MachineOperand &OrDst = PossibleOrSaveexec.getOperand(0);
+ const MachineOperand &OrSrc0 = PossibleOrSaveexec.getOperand(1);
+ if (OrDst.isReg() && OrSrc0.isReg()) {
+ if ((XorSrc0.getReg() == Exec && XorSrc1.getReg() == OrDst.getReg()) ||
+ (XorSrc0.getReg() == OrDst.getReg() && XorSrc1.getReg() == Exec)) {
+ OrXors.emplace_back(&PossibleOrSaveexec, &MI);
+ }
+ }
}
}
+}
- for (const auto &Entry : SaveExecVCmpMapping) {
- MachineInstr *SaveExecInstr = Entry.getFirst();
- MachineInstr *VCmpInstr = Entry.getSecond();
+bool SIOptimizeExecMasking::optimizeOrSaveexecXorSequences() {
+ if (OrXors.empty()) {
+ return false;
+ }
- if (optimizeSingleVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec)) {
- SaveExecInstr->eraseFromParent();
- VCmpInstr->eraseFromParent();
+ bool Changed = false;
+ const unsigned Andn2Opcode = ST->isWave32() ? AMDGPU::S_ANDN2_SAVEEXEC_B32
+ : AMDGPU::S_ANDN2_SAVEEXEC_B64;
- Changed = true;
- }
+ for (const auto &Pair : OrXors) {
+ MachineInstr *Or = nullptr;
+ MachineInstr *Xor = nullptr;
+ std::tie(Or, Xor) = Pair;
+ BuildMI(*Or->getParent(), Or->getIterator(), Or->getDebugLoc(),
+ TII->get(Andn2Opcode), Or->getOperand(0).getReg())
+ .addReg(Or->getOperand(1).getReg());
+
+ Or->eraseFromParent();
+ Xor->eraseFromParent();
+
+ Changed = true;
}
return Changed;
@@ -736,9 +774,42 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
TRI = ST->getRegisterInfo();
TII = ST->getInstrInfo();
MRI = &MF.getRegInfo();
+ Exec = TRI->getExec();
bool Changed = optimizeExecSequence();
- Changed |= optimizeVCmpxAndSaveexecSequence();
+
+ OrXors.clear();
+ SaveExecVCmpMapping.clear();
+ static unsigned SearchWindow = 10;
+ for (MachineBasicBlock &MBB : MF) {
+ unsigned SearchCount = 0;
+
+ for (auto &MI : llvm::reverse(MBB)) {
+ if (MI.isDebugInstr())
+ continue;
+
+ if (SearchCount >= SearchWindow) {
+ break;
+ }
+
+ tryRecordOrSaveexecXorSequence(MI);
+ tryRecordVCmpxAndSaveexecSequence(MI);
+
+ if (MI.modifiesRegister(Exec, TRI)) {
+ break;
+ }
+
+ ++SearchCount;
+ }
+ }
+
+ Changed |= optimizeOrSaveexecXorSequences();
+ for (const auto &Entry : SaveExecVCmpMapping) {
+ MachineInstr *SaveExecInstr = Entry.getFirst();
+ MachineInstr *VCmpInstr = Entry.getSecond();
+
+ Changed |= optimizeVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec);
+ }
return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 57dbad468de8..aed84437b890 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -184,6 +184,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And))
return false;
+ // Cannot safely mirror live intervals with PHI nodes, so check for these
+ // before optimization.
+ SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
+ LiveInterval *SelLI = &LIS->getInterval(SelReg);
+ if (llvm::any_of(SelLI->vnis(),
+ [](const VNInfo *VNI) {
+ return VNI->isPHIDef();
+ }))
+ return false;
+
// TODO: Guard against implicit def operands?
LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
<< *And);
@@ -204,31 +214,34 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
- SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
- SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
-
- LiveInterval *CmpLI =
- CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
- LiveInterval *SelLI =
- SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr;
-
// Update live intervals for CCReg before potentially removing CmpReg/SelReg,
// and their associated liveness information.
+ SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
if (CCReg.isVirtual()) {
- // Note: this ignores that SelLI might have multiple internal values
- // or splits and simply extends the live range to cover all cases
- // where the result of the v_cndmask_b32 was live (e.g. loops).
- // This could yield worse register allocation in rare edge cases.
- SlotIndex EndIdx = AndIdx.getRegSlot();
- if (SelLI && SelLI->endIndex() > EndIdx && SelLI->endIndex().isBlock())
- EndIdx = SelLI->endIndex();
+ // Apply live ranges from SelLI to CCReg potentially matching splits
+ // and extending to loop boundaries.
+
+ auto applyLiveRanges = [&](LiveRange &Dst, VNInfo *VNI) {
+ // Copy live ranges from SelLI, adjusting start and end as required
+ auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot());
+ assert(DefSegment != SelLI->end() &&
+ "No live interval segment covering definition?");
+ for (auto I = DefSegment; I != SelLI->end(); ++I) {
+ SlotIndex Start = I->start < SelIdx.getRegSlot() ?
+ SelIdx.getRegSlot() : I->start;
+ SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ?
+ I->end : AndIdx.getRegSlot();
+ Dst.addSegment(LiveRange::Segment(Start, End, VNI));
+ }
+ // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend.
+ if (!SelLI->getSegmentContaining(AndIdx.getRegSlot()))
+ Dst.addSegment(LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI));
+ };
LiveInterval &CCLI = LIS->getInterval(CCReg);
auto CCQ = CCLI.Query(SelIdx.getRegSlot());
- if (CCQ.valueIn()) {
- CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(),
- EndIdx, CCQ.valueIn()));
- }
+ if (CCQ.valueIn())
+ applyLiveRanges(CCLI, CCQ.valueIn());
if (CC->getSubReg()) {
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg());
@@ -237,10 +250,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
Allocator, Mask,
[=](LiveInterval::SubRange &SR) {
auto CCQS = SR.Query(SelIdx.getRegSlot());
- if (CCQS.valueIn()) {
- SR.addSegment(LiveRange::Segment(
- SelIdx.getRegSlot(), EndIdx, CCQS.valueIn()));
- }
+ if (CCQS.valueIn())
+ applyLiveRanges(SR, CCQS.valueIn());
},
*LIS->getSlotIndexes(), *TRI);
CCLI.removeEmptySubRanges();
@@ -253,7 +264,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
// Try to remove compare. Cmp value should not used in between of cmp
// and s_and_b64 if VCC or just unused if any other register.
- if ((CmpReg.isVirtual() && CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
+ LiveInterval *CmpLI = CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
+ if ((CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
(CmpReg == Register(CondReg) &&
std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
[&](const MachineInstr &MI) {
@@ -266,18 +278,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
Cmp->eraseFromParent();
// Try to remove v_cndmask_b32.
- if (SelLI) {
- // Kill status must be checked before shrinking the live range.
- bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill();
- LIS->shrinkToUses(SelLI);
- bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
- if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {
- LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
-
- LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
- LIS->RemoveMachineInstrFromMaps(*Sel);
- Sel->eraseFromParent();
- }
+ // Kill status must be checked before shrinking the live range.
+ bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill();
+ LIS->shrinkToUses(SelLI);
+ bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
+ if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {
+ LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
+
+ LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
+ LIS->RemoveMachineInstrFromMaps(*Sel);
+ Sel->eraseFromParent();
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
index b13afceba20e..553fb4cf496c 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -49,6 +49,8 @@ struct SIProgramInfo {
uint32_t AccumOffset = 0;
uint32_t TgSplit = 0;
uint32_t NumSGPR = 0;
+ unsigned SGPRSpill = 0;
+ unsigned VGPRSpill = 0;
uint32_t LDSSize = 0;
bool FlatUsed = false;
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 882d13402a19..b7e8eadfe71d 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -119,13 +119,19 @@ class SM_Probe_Pseudo <string opName, string variant, RegisterClass baseClass,
let PseudoInstr = opName # variant;
}
-class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]>
- : SM_Pseudo<opName, outs, ins, asmOps, pattern> {
- RegisterClass BaseClass;
+class SM_Load_Pseudo <string opName, RegisterClass baseClass,
+ RegisterClass dstClass, OffsetMode offsets>
+ : SM_Pseudo<opName, (outs dstClass:$sdst),
+ !con((ins baseClass:$sbase), offsets.Ins, (ins CPol:$cpol)),
+ " $sdst, $sbase, " # offsets.Asm # "$cpol", []> {
+ RegisterClass BaseClass = baseClass;
let mayLoad = 1;
let mayStore = 0;
let has_glc = 1;
let has_dlc = 1;
+ let has_offset = offsets.HasOffset;
+ let has_soffset = offsets.HasSOffset;
+ let PseudoInstr = opName # offsets.Variant;
}
class SM_Store_Pseudo <string opName, RegisterClass baseClass,
@@ -158,40 +164,9 @@ class SM_Discard_Pseudo <string opName, string variant, dag offsets,
multiclass SM_Pseudo_Loads<string opName,
RegisterClass baseClass,
RegisterClass dstClass> {
- def _IMM : SM_Load_Pseudo <opName,
- (outs dstClass:$sdst),
- (ins baseClass:$sbase, i32imm:$offset, CPol:$cpol),
- " $sdst, $sbase, $offset$cpol", []> {
- let has_offset = 1;
- let BaseClass = baseClass;
- let PseudoInstr = opName # "_IMM";
- let has_glc = 1;
- let has_dlc = 1;
- }
-
- def _SGPR : SM_Load_Pseudo <opName,
- (outs dstClass:$sdst),
- (ins baseClass:$sbase, SReg_32:$soffset, CPol:$cpol),
- " $sdst, $sbase, $soffset$cpol", []> {
- let has_soffset = 1;
- let BaseClass = baseClass;
- let PseudoInstr = opName # "_SGPR";
- let has_glc = 1;
- let has_dlc = 1;
- }
-
- def _SGPR_IMM : SM_Load_Pseudo <opName,
- (outs dstClass:$sdst),
- (ins baseClass:$sbase, SReg_32:$soffset,
- i32imm:$offset, CPol:$cpol),
- " $sdst, $sbase, $soffset$offset$cpol", []> {
- let has_offset = 1;
- let has_soffset = 1;
- let BaseClass = baseClass;
- let PseudoInstr = opName # "_SGPR_IMM";
- let has_glc = 1;
- let has_dlc = 1;
- }
+ def _IMM : SM_Load_Pseudo <opName, baseClass, dstClass, IMM_Offset>;
+ def _SGPR : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_Offset>;
+ def _SGPR_IMM : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_IMM_Offset>;
}
multiclass SM_Pseudo_Stores<string opName,
@@ -596,10 +571,10 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
soffset{6-0}, ?);
}
-class SMEM_Real_Load_vi<bits<8> op, string ps, dag offsets>
- : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps)> {
- RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps).BaseClass;
- let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol));
+class SMEM_Real_Load_vi<bits<8> op, string ps, OffsetMode offsets>
+ : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps # offsets.Variant)> {
+ RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
+ let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
}
// The alternative GFX9 SGPR encoding using soffset to encode the
@@ -614,14 +589,12 @@ class SMEM_Real_SGPR_alt_gfx9 {
}
multiclass SM_Real_Loads_vi<bits<8> op, string ps> {
- def _IMM_vi : SMEM_Real_Load_vi <op, ps#"_IMM", (ins smem_offset:$offset)>;
- def _SGPR_vi : SMEM_Real_Load_vi <op, ps#"_SGPR", (ins SReg_32:$soffset)>;
- def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR",
- (ins SReg_32:$soffset)>,
+ def _IMM_vi : SMEM_Real_Load_vi <op, ps, IMM_Offset>;
+ def _SGPR_vi : SMEM_Real_Load_vi <op, ps, SGPR_Offset>;
+ def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_Offset>,
SMEM_Real_SGPR_alt_gfx9;
let IsGFX9SpecificEncoding = true in
- def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <
- op, ps#"_SGPR_IMM", (ins SReg_32:$soffset, smem_offset_mod:$offset)>;
+ def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_IMM_Offset>;
}
class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
@@ -883,6 +856,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformL
def SMRDImm : ComplexPattern<iPTR, 2, "SelectSMRDImm">;
def SMRDImm32 : ComplexPattern<iPTR, 2, "SelectSMRDImm32">;
def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">;
+def SMRDSgprImm : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">;
def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
@@ -903,11 +877,18 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
// 3. SGPR offset
def : GCNPat <
- (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
+ (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))
>;
- // 4. No offset
+ // 4. SGPR+IMM offset
+ def : GCNPat <
+ (smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
+ let OtherPredicates = [isGFX9Plus];
+ }
+
+ // 5. No offset
def : GCNPat <
(vt (smrd_load (i64 SReg_64:$sbase))),
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
@@ -1021,19 +1002,16 @@ class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps>
let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
}
-multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
- SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
- SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
- def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
- }
- def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
- }
- def _SGPR_IMM_gfx10 : SMEM_Real_gfx10<op, !cast<SM_Load_Pseudo>(ps#_SGPR_IMM)> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset,
- smem_offset_mod:$offset, CPol:$cpol);
- }
+class SMEM_Real_Load_gfx10<bits<8> op, string ps, OffsetMode offsets>
+ : SMEM_Real_gfx10<op, !cast<SM_Pseudo>(ps # offsets.Variant)> {
+ RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
+ let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
+}
+
+multiclass SM_Real_Loads_gfx10<bits<8> op, string ps> {
+ def _IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, IMM_Offset>;
+ def _SGPR_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_Offset>;
+ def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_IMM_Offset>;
}
class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
@@ -1227,17 +1205,16 @@ class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
}
-class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName, dag offsets> :
- SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps), opName> {
- RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps).BaseClass;
- let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol));
+class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName, OffsetMode offsets> :
+ SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> {
+ RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
+ let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
}
multiclass SM_Real_Loads_gfx11<bits<8> op, string ps, string opName> {
- def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_IMM", opName, (ins smem_offset:$offset)>;
- def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR", opName, (ins SReg_32:$soffset)>;
- def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<
- op, ps#"_SGPR_IMM", opName, (ins SReg_32:$soffset, smem_offset_mod:$offset)>;
+ def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, IMM_Offset>;
+ def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_Offset>;
+ def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_IMM_Offset>;
def : MnemonicAlias<!cast<SM_Pseudo>(ps#"_IMM").Mnemonic, opName>,
Requires<[isGFX11Plus]>;
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 2f334e211181..b5fb390c08e1 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -417,9 +417,9 @@ bool getMAIIsGFX940XDL(unsigned Opc) {
CanBeVOPD getCanBeVOPD(unsigned Opc) {
const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
if (Info)
- return {Info->CanBeVOPDX, 1};
+ return {Info->CanBeVOPDX, true};
else
- return {0, 0};
+ return {false, false};
}
unsigned getVOPDOpcode(unsigned Opc) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index 65ed02ca62de..a2d59abd3abb 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -30,7 +30,7 @@ namespace AMDGPU {
Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
std::vector<GlobalVariable *> findVariablesToLower(Module &M,
- const Function *F = nullptr);
+ const Function *F);
/// Replace all uses of constant \p C with instructions in \p F.
void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F);
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1d374a9f90ba..73e4eb8cdc24 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -499,6 +499,59 @@ let SubtargetPredicate = isGFX9Only in {
defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
} // End SubtargetPredicate = isGFX9Only
+class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> {
+ let HasExtSDWA = 1;
+ let HasExtSDWA9 = 1;
+ let HasExt = 1;
+ let DstRCSDWA = getVALUDstForVT<vt>.ret;
+ let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0,
+ clampmod:$clamp, omod:$omod, src0_sel:$src0_sel);
+ let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel
+ let AsmSDWA9 = AsmSDWA;
+ let EmitDstSel = 0;
+}
+
+def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>;
+def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>;
+
+let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0,
+ SchedRW = [WriteFloatCvt] in {
+ defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>;
+ defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>;
+ defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>;
+ defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>;
+}
+
+class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
+ VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
+ (f32 (node i32:$src, index)),
+ !if (index,
+ (inst_sdwa 0, $src, 0, 0, index),
+ (inst_e32 $src))
+>;
+
+foreach Index = [0, 1, 2, 3] in {
+ def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index,
+ V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>;
+ def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index,
+ V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>;
+}
+
+class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
+ VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
+ (v2f32 (node i32:$src, index)),
+ !if (index,
+ (inst_sdwa 0, $src, 0, 0, SDWA.WORD_1),
+ (inst_e32 $src))
+>;
+
+foreach Index = [0, -1] in {
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
+ V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
+ V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
+}
+
let SubtargetPredicate = isGFX10Plus in {
defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>;
@@ -1106,11 +1159,36 @@ multiclass VOP1_Real_gfx9 <bits<10> op> {
}
+multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> {
+ let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
+ defm NAME : VOP1_Real_e32e64_vi <op>;
+ }
+
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
+ def _sdwa_gfx9 :
+ VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
+ let Inst{42-40} = 6;
+ }
+
+ foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ def _dpp_gfx9 :
+ VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
+ VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
+}
+
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in
defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>;
+let OtherPredicates = [HasFP8Insts] in {
+defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>;
+defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>;
+defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;
+defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;
+}
+
//===----------------------------------------------------------------------===//
// GFX10
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index dddd0aacc140..a911483cade5 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -481,6 +481,30 @@ def shl_0_to_4 : PatFrag<
}];
}
+def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> {
+ let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
+ FP32InputMods:$src1_modifiers, Src1RC64:$src1,
+ VGPR_32:$vdst_in, op_sel0:$op_sel);
+ let HasClamp = 0;
+ let HasExtVOP3DPP = 0;
+}
+
+def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
+ VOP3_OPSEL> {
+ let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
+ FP32InputMods:$src1_modifiers, Src1RC64:$src1,
+ FP32InputMods:$src2_modifiers, VGPR_32:$src2,
+ op_sel0:$op_sel);
+ let HasClamp = 0;
+ let HasSrc2 = 0;
+ let HasSrc2Mods = 1;
+ let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
+ getAsmVOP3OpSel<3, HasClamp,
+ HasSrc0FloatMods, HasSrc1FloatMods,
+ HasSrc2FloatMods>.ret);
+ let HasExtVOP3DPP = 0;
+}
+
let SubtargetPredicate = isGFX9Plus in {
let isCommutable = 1, isReMaterializable = 1 in {
defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -526,6 +550,43 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
let SubtargetPredicate = isGFX940Plus in
defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
+let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0,
+ SchedRW = [WriteFloatCvt] in {
+ let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in {
+ defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>;
+ defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>;
+ }
+
+ // These instructions have non-standard use of op_sel. In particular they are
+ // using op_sel bits 2 and 3 while only having two sources. Therefore dummy
+ // src2 is used to hold the op_sel value.
+ let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in {
+ defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>;
+ defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>;
+ }
+}
+
+class Cvt_PK_F8_F32_Pat<SDPatternOperator node, int index, VOP3_Pseudo inst> : GCNPat<
+ (i32 (node f32:$src0, f32:$src1, i32:$old, index)),
+ (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, !if(index, SRCMODS.OP_SEL_0, 0))
+>;
+
+class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst> : GCNPat<
+ (i32 (node f32:$src0, i32:$src1, i32:$old, index)),
+ (inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1,
+ !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, !if(index{1}, SRCMODS.OP_SEL_0, 0))
+>;
+
+foreach Index = [0, -1] in {
+ def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>;
+ def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>;
+}
+
+foreach Index = [0, 1, 2, 3] in {
+ def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>;
+ def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>;
+}
+
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
// This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
(ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
@@ -699,15 +760,19 @@ def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>;
}
class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> {
- // FIXME VOP3 DPP versions are unsupported
- let HasExtVOP3DPP = 0;
let HasClamp = 0;
let HasOMod = 0;
- let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
- NumSrcArgs, HasClamp, HasOMod,
- !if(isFloatType<Src0VT>.ret, FPVRegInputMods, IntOpSelMods),
- !if(isFloatType<Src1VT>.ret, FPVRegInputMods, IntOpSelMods),
- !if(isFloatType<Src2VT>.ret, FPVRegInputMods, IntOpSelMods)>.ret;
+ // Override modifiers for bf16(i16) (same as float modifiers).
+ let HasSrc0Mods = 1;
+ let HasSrc1Mods = 1;
+ let HasSrc2Mods = 1;
+ let Src0ModDPP = FPVRegInputMods;
+ let Src1ModDPP = FPVRegInputMods;
+ let Src2ModVOP3DPP = FPVRegInputMods;
+ let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
+ HasClamp, HasOMod, FPVRegInputMods,
+ FPVRegInputMods, FPVRegInputMods>.ret;
+ let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, 1, 1, 1>.ret;
}
let SubtargetPredicate = isGFX11Plus in {
@@ -723,7 +788,7 @@ let SubtargetPredicate = isGFX11Plus in {
defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
} // End SubtargetPredicate = isGFX11Plus
-let SubtargetPredicate = HasDot8Insts in {
+let SubtargetPredicate = HasDot8Insts, IsDOT=1 in {
defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>;
defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_I16_V2I16_V2I16_I16>, int_amdgcn_fdot2_bf16_bf16>;
}
@@ -848,9 +913,8 @@ defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11<0x262>;
defm V_MINMAX_U32 : VOP3_Realtriple_gfx11<0x263>;
defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11<0x264>;
defm V_MINMAX_I32 : VOP3_Realtriple_gfx11<0x265>;
-// FIXME VOP3 DPP Dot instructions are unsupported
-defm V_DOT2_F16_F16 : VOP3_Real_Base_gfx11<0x266>;
-defm V_DOT2_BF16_BF16 : VOP3_Real_Base_gfx11<0x267>;
+defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11<0x266>;
+defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11<0x267>;
defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
@@ -1161,6 +1225,13 @@ multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
}
+multiclass VOP3OpSel_Real_gfx9_forced_opsel2<bits<10> op> {
+ def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
+ let Inst{13} = src2_modifiers{2}; // op_sel(2)
+ }
+}
+
multiclass VOP3Interp_Real_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
@@ -1352,3 +1423,10 @@ defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>;
defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>;
defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
+
+let OtherPredicates = [HasFP8Insts] in {
+defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>;
+defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
+defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
+defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
+}
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 59ce532af59b..f1ce613d613b 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -493,6 +493,8 @@ def VOPProfileMAI_I32_I64_X16 : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, A
def VOPProfileMAI_I32_I64_X32 : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, AISrc_512_b32, ADst_512, AVSrc_64>;
def VOPProfileMAI_F32_V2F32_X16 : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>;
def VOPProfileMAI_F32_V2F32_X32 : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>;
+def VOPProfileMAI_F32_I64_X32 : VOPProfileMAI<VOP_V4F32_I64_I64_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>;
+def VOPProfileMAI_F32_I64_X16 : VOPProfileMAI<VOP_V16F32_I64_I64_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>;
def VOPProfileMAI_F32_F32_X4_VCD : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, VISrc_128_f32, VDst_128>;
def VOPProfileMAI_F32_F32_X16_VCD : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, VISrc_512_f32, VDst_512>;
@@ -515,6 +517,8 @@ def VOPProfileMAI_I32_I64_X16_VCD : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32,
def VOPProfileMAI_I32_I64_X32_VCD : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, VISrc_512_b32, VDst_512, AVSrc_64>;
def VOPProfileMAI_F32_V2F32_X16_VCD : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>;
def VOPProfileMAI_F32_V2F32_X32_VCD : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>;
+def VOPProfileMAI_F32_I64_X32_VCD : VOPProfileMAI<VOP_V4F32_I64_I64_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>;
+def VOPProfileMAI_F32_I64_X16_VCD : VOPProfileMAI<VOP_V16F32_I64_I64_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>;
def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32, AVDst_128, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
@@ -522,6 +526,8 @@ def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I
def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_16X16X64_F8 : VOPProfileSMFMAC<VOP_V4F32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>;
+def VOPProfileSMFMAC_F32_32X32X32_F8 : VOPProfileSMFMAC<VOP_V16F32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>;
class MFMATable <bit is_mac, string Name> {
bit IsMac = is_mac;
@@ -638,6 +644,14 @@ let Predicates = [isGFX940Plus], is_gfx940_xdl = 1 in {
defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>;
defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>;
defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>;
+ defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_bf8>;
+ defm V_MFMA_F32_16X16X32_BF8_FP8 : MAIInst<"v_mfma_f32_16x16x32_bf8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_fp8>;
+ defm V_MFMA_F32_16X16X32_FP8_BF8 : MAIInst<"v_mfma_f32_16x16x32_fp8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_bf8>;
+ defm V_MFMA_F32_16X16X32_FP8_FP8 : MAIInst<"v_mfma_f32_16x16x32_fp8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_fp8>;
+ defm V_MFMA_F32_32X32X16_BF8_BF8 : MAIInst<"v_mfma_f32_32x32x16_bf8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_bf8>;
+ defm V_MFMA_F32_32X32X16_BF8_FP8 : MAIInst<"v_mfma_f32_32x32x16_bf8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_fp8>;
+ defm V_MFMA_F32_32X32X16_FP8_BF8 : MAIInst<"v_mfma_f32_32x32x16_fp8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_bf8>;
+ defm V_MFMA_F32_32X32X16_FP8_FP8 : MAIInst<"v_mfma_f32_32x32x16_fp8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_fp8>;
} // End Predicates = [isGFX940Plus], is_gfx940_xdl = 1
multiclass SMFMACInst<string OpName, string P, SDPatternOperator node> {
@@ -654,6 +668,14 @@ defm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16",
defm V_SMFMAC_F32_32X32X16_BF16 : SMFMACInst<"v_smfmac_f32_32x32x16_bf16", "F32_32X32X16_I16", int_amdgcn_smfmac_f32_32x32x16_bf16>;
defm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8", "I32_16X16X64_I8", int_amdgcn_smfmac_i32_16x16x64_i8>;
defm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>;
+defm V_SMFMAC_F32_16X16X64_BF8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>;
+defm V_SMFMAC_F32_16X16X64_BF8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>;
+defm V_SMFMAC_F32_16X16X64_FP8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>;
+defm V_SMFMAC_F32_16X16X64_FP8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_fp8>;
+defm V_SMFMAC_F32_32X32X32_BF8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_bf8>;
+defm V_SMFMAC_F32_32X32X32_BF8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>;
+defm V_SMFMAC_F32_32X32X32_FP8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>;
+defm V_SMFMAC_F32_32X32X32_FP8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>;
}
def MAIInstInfoTable : GenericTable {
@@ -1121,6 +1143,14 @@ defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x
defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">;
defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">;
+defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>;
+defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>;
+defm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x72>;
+defm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x73>;
+defm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x74>;
+defm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x75>;
+defm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x76>;
+defm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x77>;
defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">;
defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">;
@@ -1137,6 +1167,14 @@ defm V_SMFMAC_F32_16X16X32_BF16 : VOP3P_Real_SMFMAC <0x66, "v_smfmac_f32_16x1
defm V_SMFMAC_F32_32X32X16_BF16 : VOP3P_Real_SMFMAC <0x68, "v_smfmac_f32_32x32x16bf16">;
defm V_SMFMAC_I32_16X16X64_I8 : VOP3P_Real_SMFMAC <0x6a, "v_smfmac_i32_16x16x64i8">;
defm V_SMFMAC_I32_32X32X32_I8 : VOP3P_Real_SMFMAC <0x6c, "v_smfmac_i32_32x32x32i8">;
+defm V_SMFMAC_F32_16X16X64_BF8_BF8 : VOP3P_Real_SMFMAC <0x78, "v_smfmac_f32_16x16x64bf8bf8">;
+defm V_SMFMAC_F32_16X16X64_BF8_FP8 : VOP3P_Real_SMFMAC <0x79, "v_smfmac_f32_16x16x64bf8fp8">;
+defm V_SMFMAC_F32_16X16X64_FP8_BF8 : VOP3P_Real_SMFMAC <0x7a, "v_smfmac_f32_16x16x64fp8bf8">;
+defm V_SMFMAC_F32_16X16X64_FP8_FP8 : VOP3P_Real_SMFMAC <0x7b, "v_smfmac_f32_16x16x64fp8fp8">;
+defm V_SMFMAC_F32_32X32X32_BF8_BF8 : VOP3P_Real_SMFMAC <0x7c, "v_smfmac_f32_32x32x32bf8bf8">;
+defm V_SMFMAC_F32_32X32X32_BF8_FP8 : VOP3P_Real_SMFMAC <0x7d, "v_smfmac_f32_32x32x32bf8fp8">;
+defm V_SMFMAC_F32_32X32X32_FP8_BF8 : VOP3P_Real_SMFMAC <0x7e, "v_smfmac_f32_32x32x32fp8bf8">;
+defm V_SMFMAC_F32_32X32X32_FP8_FP8 : VOP3P_Real_SMFMAC <0x7f, "v_smfmac_f32_32x32x32fp8fp8">;
let SubtargetPredicate = HasPackedFP32Ops in {
defm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>;
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 33d3441e94c2..d489a089ac78 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -59,15 +59,17 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt
"$src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl");
let AsmDPP8 = "$src0, $src1 $dpp8$fi";
let AsmDPP16 = AsmDPP#"$fi";
+ // VOPC DPP Instructions do not need an old operand
+ let TieRegDPP = "";
let InsDPP = getInsDPP<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP,
NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP,
- Src2ModDPP>.ret;
+ Src2ModDPP, 0/*HasOld*/>.ret;
let InsDPP16 = getInsDPP16<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP,
NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP,
- Src2ModDPP>.ret;
+ Src2ModDPP, 0/*HasOld*/>.ret;
let InsDPP8 = getInsDPP8<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP,
NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP,
- Src2ModDPP>.ret;
+ Src2ModDPP, 0/*HasOld*/>.ret;
// The destination for 32-bit encoding is implicit.
let HasDst32 = 0;
@@ -76,9 +78,9 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt
let Outs64 = (outs VOPDstS64orS32:$sdst);
let OutsVOP3DPP = Outs64;
let OutsVOP3DPP8 = Outs64;
- let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret;
- let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret;
- let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret;
+ let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0VOP3DPP, NumSrcArgs, 0/*HasOld*/>.ret;
+ let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0VOP3DPP, NumSrcArgs, 0/*HasOld*/>.ret;
+ let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0VOP3DPP, NumSrcArgs, 0/*HasOld*/>.ret;
list<SchedReadWrite> Schedule = sched;
}
@@ -293,7 +295,7 @@ multiclass VOPC_Pseudos <string opName,
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = P.Schedule;
let isCompare = 1;
- let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $sdst", "");
+ let Constraints = "";
}
} // end SubtargetPredicate = isGFX11Plus
@@ -711,7 +713,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> :
VOPC_Profile<sched, vt, i32> {
let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
let AsmDPP16 = AsmDPP#"$fi";
- let InsDPP = (ins VGPR_32:$old, FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP = (ins FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let InsDPP16 = !con(InsDPP, (ins FI:$fi));
// DPP8 forbids modifiers and can inherit from VOPC_Profile
@@ -793,7 +795,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
def _e64_dpp : VOP3_DPP_Pseudo<opName, p> {
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = p.Schedule;
- let Constraints = !if(p.NumSrcArgs, p.TieRegDPP # " = $sdst", "");
+ let Constraints = "";
}
} // end SubtargetPredicate = isGFX11Plus
}
@@ -1068,7 +1070,6 @@ class VOPC_DPP16<bits<8> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
let Uses = ps.Uses;
let OtherPredicates = ps.OtherPredicates;
let Constraints = ps.Constraints;
- let AsmMatchConverter = "cvtVOPCNoDstDPP";
}
class VOPC_DPP16_SIMC<bits<8> op, VOP_DPP_Pseudo ps, int subtarget,
@@ -1084,7 +1085,6 @@ class VOPC_DPP8<bits<8> op, VOPC_Pseudo ps, string opName = ps.OpName>
let Uses = ps.Uses;
let OtherPredicates = ps.OtherPredicates;
let Constraints = "";
- let AsmMatchConverter = "cvtVOPCNoDstDPP8";
}
// VOPC64
@@ -1133,7 +1133,6 @@ class VOPC64_DPP16_NoDst<bits<10> op, VOP_DPP_Pseudo ps,
string opName = ps.OpName>
: VOPC64_DPP16<op, ps, opName> {
let Inst{7-0} = ? ;
- let AsmMatchConverter = "cvtVOPC64NoDstDPP";
}
class VOPC64_DPP8_Base<bits<10> op, string OpName, VOPProfile P>
@@ -1163,13 +1162,12 @@ class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
: VOPC64_DPP8<op, ps, opName> {
bits<8> sdst;
let Inst{7-0} = sdst;
- let Constraints = "$old = $sdst";
+ let Constraints = "";
}
class VOPC64_DPP8_NoDst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
: VOPC64_DPP8<op, ps, opName> {
let Inst{7-0} = ? ;
- let AsmMatchConverter = "cvtVOPC64NoDstDPP8";
let Constraints = "";
}
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 187485ffa3ae..b65ca2d6b1b3 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -269,6 +269,10 @@ class VOP3OpSel_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
class VOP3OpSel_gfx11<bits<10> op, VOPProfile p> : VOP3OpSel_gfx10<op, p>;
+class VOP3DotOpSel_gfx11<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11<op, p>{
+ let Inst{11} = ?;
+ let Inst{12} = ?;
+}
// NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa
class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
@@ -1270,6 +1274,8 @@ multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_f
class Base_VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName>
: VOP3_DPP<op, opName, ps.Pfl, 1> {
+ let VOP3_OPSEL = ps.Pfl.HasOpSel;
+ let IsDOT = ps.IsDOT;
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
@@ -1285,6 +1291,8 @@ class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
class Base_VOP3_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
: VOP3_DPP8<op, opName, ps.Pfl> {
+ let VOP3_OPSEL = ps.Pfl.HasOpSel;
+ let IsDOT = ps.IsDOT;
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
@@ -1326,6 +1334,15 @@ let AssemblerPredicate = isGFX11Only,
VOP3e_gfx11<op, ps.Pfl>;
}
}
+ multiclass VOP3Dot_Real_Base_gfx11<bits<10> op, string opName = NAME,
+ bit isSingle = 0> {
+ defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
+ let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
+ def _e64_gfx11 :
+ VOP3_Real<ps, SIEncodingFamily.GFX11>,
+ VOP3DotOpSel_gfx11<op, ps.Pfl>;
+ }
+ }
multiclass VOP3_Real_with_name_gfx11<bits<10> op, string opName,
string asmName, bit isSingle = 0> {
defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
@@ -1355,6 +1372,15 @@ let AssemblerPredicate = isGFX11Only,
let DecoderNamespace = "DPPGFX11";
}
}
+
+ multiclass VOP3Dot_Real_dpp_Base_gfx11<bits<10> op, string opName = NAME> {
+ def _e64_dpp_gfx11 : VOP3_DPP16<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), SIEncodingFamily.GFX11> {
+ let Inst{11} = ?;
+ let Inst{12} = ?;
+ let DecoderNamespace = "DPPGFX11";
+ }
+ }
+
multiclass VOP3_Real_dpp_with_name_gfx11<bits<10> op, string opName,
string asmName> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
@@ -1368,6 +1394,16 @@ let AssemblerPredicate = isGFX11Only,
let DecoderNamespace = "DPP8GFX11";
}
}
+
+ multiclass VOP3Dot_Real_dpp8_Base_gfx11<bits<10> op, string opName = NAME> {
+ defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ def _e64_dpp8_gfx11 : Base_VOP3_DPP8<op, ps> {
+ let Inst{11} = ?;
+ let Inst{12} = ?;
+ let DecoderNamespace = "DPP8GFX11";
+ }
+ }
+
multiclass VOP3_Real_dpp8_with_name_gfx11<bits<10> op, string opName,
string asmName> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
@@ -1406,6 +1442,12 @@ multiclass VOP3_Realtriple_gfx11<bits<10> op,
VOP3_Real_dpp_Base_gfx11<op, opName>,
VOP3_Real_dpp8_Base_gfx11<op, opName>;
+multiclass VOP3Dot_Realtriple_gfx11<bits<10> op,
+ bit isSingle = 0, string opName = NAME> :
+ VOP3Dot_Real_Base_gfx11<op, opName, isSingle>,
+ VOP3Dot_Real_dpp_Base_gfx11<op, opName>,
+ VOP3Dot_Real_dpp8_Base_gfx11<op, opName>;
+
multiclass VOP3Only_Realtriple_gfx11<bits<10> op> :
VOP3_Realtriple_gfx11<op, 1>;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 80ba7b5f0d2e..183febe756c1 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -6726,8 +6726,8 @@ bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault(
return Subtarget.isMClass() && MF.getFunction().hasMinSize();
}
-bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const {
+bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(
+ const MachineInstr &MI) const {
// Try hard to rematerialize any VCTPs because if we spill P0, it will block
// the tail predication conversion. This means that the element count
// register has to be live for longer, but that has to be better than
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 3b8f3403e3c3..453e3fa1b99b 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -480,8 +480,7 @@ private:
MachineInstr *canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) const;
- bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const override;
+ bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 613904f702f0..e5347ed8e53a 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1720,6 +1720,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
unsigned UxtOp,
MachineBasicBlock::iterator &NextMBBI) {
bool IsThumb = STI->isThumb();
+ bool IsThumb1Only = STI->isThumb1Only();
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
const MachineOperand &Dest = MI.getOperand(0);
@@ -1794,7 +1795,8 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
MIB.add(predOps(ARMCC::AL));
- unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
+ unsigned CMPri =
+ IsThumb ? (IsThumb1Only ? ARM::tCMPi8 : ARM::t2CMPri) : ARM::CMPri;
BuildMI(StoreBB, DL, TII->get(CMPri))
.addReg(TempReg, RegState::Kill)
.addImm(0)
@@ -1848,6 +1850,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
bool IsThumb = STI->isThumb();
+ assert(!STI->isThumb1Only() && "CMP_SWAP_64 unsupported under Thumb1!");
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineOperand &Dest = MI.getOperand(0);
@@ -3044,6 +3047,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
assert(STI->isThumb());
return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH,
NextMBBI);
+ case ARM::tCMP_SWAP_32:
+ assert(STI->isThumb());
+ return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, NextMBBI);
case ARM::CMP_SWAP_8:
assert(!STI->isThumb());
@@ -3054,11 +3060,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH,
NextMBBI);
case ARM::CMP_SWAP_32:
- if (STI->isThumb())
- return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0,
- NextMBBI);
- else
- return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI);
+ assert(!STI->isThumb());
+ return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI);
case ARM::CMP_SWAP_64:
return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index e0e4ffd90e0e..afe16a3cd55c 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3131,7 +3131,7 @@ bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
// Else v8i16 pattern of an extract and an insert, with a optional vmovx for
// extracting odd lanes.
- if (VT == MVT::v8i16) {
+ if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
SDValue Inp1 = CurDAG->getTargetExtractSubreg(
ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
SDValue Inp2 = CurDAG->getTargetExtractSubreg(
@@ -3151,7 +3151,7 @@ bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
// The inserted values are not extracted - if they are f16 then insert them
// directly using a VINS.
- if (VT == MVT::v8f16) {
+ if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
SDValue NewIns =
CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
@@ -3512,7 +3512,7 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
else if (MemTy == MVT::i16)
Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
else if (MemTy == MVT::i32)
- Opcode = ARM::CMP_SWAP_32;
+ Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
else
llvm_unreachable("Unknown AtomicCmpSwap type");
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e6be93e6480a..743cca9ff71f 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13572,6 +13572,10 @@ static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG,
bool
ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
if (Level == BeforeLegalizeTypes)
return true;
@@ -13605,8 +13609,38 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
return false;
}
+bool ARMTargetLowering::isDesirableToCommuteXorWithShift(
+ const SDNode *N) const {
+ assert(N->getOpcode() == ISD::XOR &&
+ (N->getOperand(0).getOpcode() == ISD::SHL ||
+ N->getOperand(0).getOpcode() == ISD::SRL) &&
+ "Expected XOR(SHIFT) pattern");
+
+ // Only commute if the entire NOT mask is a hidden shifted mask.
+ auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
+ if (XorC && ShiftC) {
+ unsigned MaskIdx, MaskLen;
+ if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
+ unsigned ShiftAmt = ShiftC->getZExtValue();
+ unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
+ if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
+ return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
+ }
+ }
+
+ return false;
+}
+
bool ARMTargetLowering::shouldFoldConstantShiftPairToMask(
const SDNode *N, CombineLevel Level) const {
+ assert(((N->getOpcode() == ISD::SHL &&
+ N->getOperand(0).getOpcode() == ISD::SRL) ||
+ (N->getOpcode() == ISD::SRL &&
+ N->getOperand(0).getOpcode() == ISD::SHL)) &&
+ "Expected shift-shift mask");
+
if (!Subtarget->isThumb1Only())
return true;
@@ -19962,6 +19996,14 @@ bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
}
break;
}
+ case ARMISD::VBICIMM: {
+ SDValue Op0 = Op.getOperand(0);
+ unsigned ModImm = Op.getConstantOperandVal(1);
+ unsigned EltBits = 0;
+ uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
+ if ((OriginalDemandedBits & Mask) == 0)
+ return TLO.CombineTo(Op, Op0);
+ }
}
return TargetLowering::SimplifyDemandedBitsForTargetNode(
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 10f60ab93ae3..fae279ea7569 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -733,6 +733,8 @@ class VectorType;
bool isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const override;
+ bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
+
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 15c33014e988..9c03f72fe6ae 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1882,6 +1882,7 @@ let Predicates = [HasMVEInt] in {
def : Pat<(ARMvgetlaneu (v8f16 MQPR:$src), imm:$lane),
(MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
// For i16's inserts being extracted from low lanes, then may use VINS.
+ let Predicates = [HasFullFP16] in {
def : Pat<(ARMinsertelt (v8i16 MQPR:$src1),
(ARMvgetlaneu (v8i16 MQPR:$src2), imm_even:$extlane),
imm_odd:$inslane),
@@ -1889,6 +1890,7 @@ let Predicates = [HasMVEInt] in {
(VINSH (EXTRACT_SUBREG MQPR:$src1, (SSubReg_f16_reg imm_odd:$inslane)),
(EXTRACT_SUBREG MQPR:$src2, (SSubReg_f16_reg imm_even:$extlane))),
(SSubReg_f16_reg imm_odd:$inslane)), MQPR)>;
+ }
def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
(MVE_VMOV_to_lane_8 (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
@@ -1905,17 +1907,21 @@ let Predicates = [HasMVEInt] in {
def : Pat<(insertelt (v8f16 MQPR:$src1), (f16 HPR:$src2), imm_even:$lane),
(MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS (f16 HPR:$src2), rGPR), imm:$lane)>;
+ let Predicates = [HasFullFP16] in {
def : Pat<(insertelt (v8f16 MQPR:$src1), (f16 HPR:$src2), imm_odd:$lane),
(COPY_TO_REGCLASS (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)),
(VINSH (EXTRACT_SUBREG MQPR:$src1, (SSubReg_f16_reg imm_odd:$lane)),
(COPY_TO_REGCLASS HPR:$src2, SPR)),
(SSubReg_f16_reg imm_odd:$lane)), MQPR)>;
+ }
def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane),
(EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>;
+ let Predicates = [HasFullFP16] in {
def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane),
(COPY_TO_REGCLASS
(VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))),
HPR)>;
+ }
def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index 71527ae1ab11..8f7039a327b3 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1782,11 +1782,15 @@ def tLDRConstPool
let Constraints = "@earlyclobber $Rd,@earlyclobber $temp",
mayLoad = 1, mayStore = 1 in {
-def tCMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
+def tCMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, tGPR:$temp),
(ins GPR:$addr, tGPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;
-def tCMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
+def tCMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, tGPR:$temp),
(ins GPR:$addr, tGPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;
+
+def tCMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, tGPR:$temp),
+ (ins GPR:$addr, GPR:$desired, GPR:$new),
+ NoItinerary, []>, Sched<[]>;
}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index ba1d806c8d81..3c102463ba08 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -20,8 +20,8 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
@@ -33,6 +33,7 @@
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -2197,12 +2198,9 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
return true;
}
-bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
- ScalarEvolution &SE,
- AssumptionCache &AC,
- TargetLibraryInfo *TLI,
- DominatorTree *DT,
- const LoopAccessInfo *LAI) {
+bool ARMTTIImpl::preferPredicateOverEpilogue(
+ Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
+ TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL) {
if (!EnableTailPredication) {
LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");
return false;
@@ -2244,7 +2242,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
return false;
}
- return canTailPredicateLoop(L, LI, SE, DL, LAI);
+ return canTailPredicateLoop(L, LI, SE, DL, LVL->getLAI());
}
PredicationStyle ARMTTIImpl::emitGetActiveLaneMask() const {
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index dcf82e703a7f..9c3980d79e60 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -288,12 +288,10 @@ public:
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo);
- bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
- ScalarEvolution &SE,
- AssumptionCache &AC,
- TargetLibraryInfo *TLI,
+ bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
+ AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,
- const LoopAccessInfo *LAI);
+ LoopVectorizationLegality *LVL);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE);
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h
index 2325193bac0a..3dd71243387b 100644
--- a/llvm/lib/Target/AVR/AVRSubtarget.h
+++ b/llvm/lib/Target/AVR/AVRSubtarget.h
@@ -92,15 +92,15 @@ public:
}
/// Get I/O register addresses.
- int getIORegRAMPZ(void) const { return hasELPM() ? 0x3b : -1; }
- int getIORegEIND(void) const { return hasEIJMPCALL() ? 0x3c : -1; }
- int getIORegSPL(void) const { return 0x3d; }
- int getIORegSPH(void) const { return hasSmallStack() ? -1 : 0x3e; }
- int getIORegSREG(void) const { return 0x3f; }
+ int getIORegRAMPZ() const { return hasELPM() ? 0x3b : -1; }
+ int getIORegEIND() const { return hasEIJMPCALL() ? 0x3c : -1; }
+ int getIORegSPL() const { return 0x3d; }
+ int getIORegSPH() const { return hasSmallStack() ? -1 : 0x3e; }
+ int getIORegSREG() const { return 0x3f; }
/// Get GPR aliases.
- int getRegTmpIndex(void) const { return hasTinyEncoding() ? 16 : 0; }
- int getRegZeroIndex(void) const { return hasTinyEncoding() ? 17 : 1; }
+ int getRegTmpIndex() const { return hasTinyEncoding() ? 16 : 0; }
+ int getRegZeroIndex() const { return hasTinyEncoding() ? 17 : 1; }
private:
/// The ELF e_flags architecture.
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
index d490b385ac16..0bf739452fd2 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
@@ -518,7 +518,7 @@ void CSKYInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned Opcode = 0;
if (CSKY::GPRRegClass.contains(DestReg, SrcReg))
- Opcode = CSKY::MOV32;
+ Opcode = STI.hasE2() ? CSKY::MOV32 : CSKY::MOV16;
else if (v2sf && CSKY::sFPR32RegClass.contains(DestReg, SrcReg))
Opcode = CSKY::FMOV_S;
else if (v3sf && CSKY::FPR32RegClass.contains(DestReg, SrcReg))
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index 3e09270a66d0..869433613620 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -13,6 +13,7 @@
#include "DXILBitcodeWriter.h"
#include "DXILValueEnumerator.h"
#include "PointerTypeAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/BitcodeCommon.h"
#include "llvm/Bitcode/BitcodeReader.h"
@@ -2580,10 +2581,9 @@ void DXILBitcodeWriter::writeFunctionLevelValueSymbolTable(
SortedTable.push_back(VI.second->getValueName());
}
// The keys are unique, so there shouldn't be stability issues.
- std::sort(SortedTable.begin(), SortedTable.end(),
- [](const ValueName *A, const ValueName *B) {
- return A->first() < B->first();
- });
+ llvm::sort(SortedTable, [](const ValueName *A, const ValueName *B) {
+ return A->first() < B->first();
+ });
for (const ValueName *SI : SortedTable) {
auto &Name = *SI;
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp
index 08944ee3f1fe..e2a41515de38 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp
@@ -809,7 +809,7 @@ void ValueEnumerator::organizeMetadata() {
// - by function, then
// - by isa<MDString>
// and then sort by the original/current ID. Since the IDs are guaranteed to
- // be unique, the result of std::sort will be deterministic. There's no need
+ // be unique, the result of llvm::sort will be deterministic. There's no need
// for std::stable_sort.
llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) {
return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) <
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index abd84a188cfa..bd0232c71d48 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -85,7 +85,6 @@ public:
int getAllocSizeOf(const Type *Ty) const;
int getTypeAlignment(Type *Ty) const;
- VectorType *getByteVectorTy(int ScLen) const;
Constant *getNullValue(Type *Ty) const;
Constant *getFullValue(Type *Ty) const;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 4acf90bd9788..93c8864347bb 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -217,9 +217,8 @@ SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
unsigned ADDIOp = Subtarget.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
- // FIXME: Only support PC-relative addressing to access the symbol.
- // TODO: Add target flags.
- if (!isPositionIndependent()) {
+ // TODO: Support dso_preemptable and target flags.
+ if (GV->isDSOLocal()) {
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty);
SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, GA), 0);
SDValue Addr(DAG.getMachineNode(ADDIOp, DL, Ty, AddrHi, GA), 0);
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 468c4f43cb90..2d08d5c674bc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -38,9 +38,7 @@ static std::string computeDataLayout(const Triple &TT) {
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::Static;
- return *RM;
+ return RM.value_or(Reloc::Static);
}
LoongArchTargetMachine::LoongArchTargetMachine(
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index b98be4ae4b75..4dfc16526a00 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -1192,6 +1192,12 @@ bool MipsTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
bool MipsTargetLowering::shouldFoldConstantShiftPairToMask(
const SDNode *N, CombineLevel Level) const {
+ assert(((N->getOpcode() == ISD::SHL &&
+ N->getOperand(0).getOpcode() == ISD::SRL) ||
+ (N->getOpcode() == ISD::SRL &&
+ N->getOperand(0).getOpcode() == ISD::SHL)) &&
+ "Expected shift-shift mask");
+
if (N->getOperand(0).getValueType().isVector())
return false;
return true;
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 9977d8ba0300..45e82e935772 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -73,8 +73,10 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -354,8 +356,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
// PTX ABI requires all scalar return values to be at least 32
// bits in size. fp16 normally uses .b16 as its storage type in
// PTX, so its size must be adjusted here, too.
- if (size < 32)
- size = 32;
+ size = promoteScalarArgumentSize(size);
O << ".param .b" << size << " func_retval0";
} else if (isa<PointerType>(Ty)) {
@@ -384,8 +385,8 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32))
- sz = 32;
+ if (elemtype.isInteger())
+ sz = promoteScalarArgumentSize(sz);
O << ".reg .b" << sz << " func_retval" << idx;
if (j < je - 1)
O << ", ";
@@ -1168,31 +1169,37 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
GVar->hasInitializer()) {
const Constant *Initializer = GVar->getInitializer();
if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
- AggBuffer aggBuffer(ElementSize, O, *this);
+ AggBuffer aggBuffer(ElementSize, *this);
bufferAggregateConstant(Initializer, &aggBuffer);
- if (aggBuffer.numSymbols) {
- if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
- O << " .u64 ";
+ if (aggBuffer.numSymbols()) {
+ unsigned int ptrSize = MAI->getCodePointerSize();
+ if (ElementSize % ptrSize ||
+ !aggBuffer.allSymbolsAligned(ptrSize)) {
+ // Print in bytes and use the mask() operator for pointers.
+ if (!STI.hasMaskOperator())
+ report_fatal_error(
+ "initialized packed aggregate with pointers '" +
+ GVar->getName() +
+ "' requires at least PTX ISA version 7.1");
+ O << " .u8 ";
getSymbol(GVar)->print(O, MAI);
- O << "[";
- O << ElementSize / 8;
+ O << "[" << ElementSize << "] = {";
+ aggBuffer.printBytes(O);
+ O << "}";
} else {
- O << " .u32 ";
+ O << " .u" << ptrSize * 8 << " ";
getSymbol(GVar)->print(O, MAI);
- O << "[";
- O << ElementSize / 4;
+ O << "[" << ElementSize / ptrSize << "] = {";
+ aggBuffer.printWords(O);
+ O << "}";
}
- O << "]";
} else {
O << " .b8 ";
getSymbol(GVar)->print(O, MAI);
- O << "[";
- O << ElementSize;
- O << "]";
+ O << "[" << ElementSize << "] = {";
+ aggBuffer.printBytes(O);
+ O << "}";
}
- O << " = {";
- aggBuffer.print();
- O << "}";
} else {
O << " .b8 ";
getSymbol(GVar)->print(O, MAI);
@@ -1219,6 +1226,80 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
O << ";\n";
}
+void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) {
+ const Value *v = Symbols[nSym];
+ const Value *v0 = SymbolsBeforeStripping[nSym];
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ MCSymbol *Name = AP.getSymbol(GVar);
+ PointerType *PTy = dyn_cast<PointerType>(v0->getType());
+ // Is v0 a generic pointer?
+ bool isGenericPointer = PTy && PTy->getAddressSpace() == 0;
+ if (EmitGeneric && isGenericPointer && !isa<Function>(v)) {
+ os << "generic(";
+ Name->print(os, AP.MAI);
+ os << ")";
+ } else {
+ Name->print(os, AP.MAI);
+ }
+ } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
+ const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false);
+ AP.printMCExpr(*Expr, os);
+ } else
+ llvm_unreachable("symbol type unknown");
+}
+
+void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) {
+ unsigned int ptrSize = AP.MAI->getCodePointerSize();
+ symbolPosInBuffer.push_back(size);
+ unsigned int nSym = 0;
+ unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
+ for (unsigned int pos = 0; pos < size;) {
+ if (pos)
+ os << ", ";
+ if (pos != nextSymbolPos) {
+ os << (unsigned int)buffer[pos];
+ ++pos;
+ continue;
+ }
+ // Generate a per-byte mask() operator for the symbol, which looks like:
+ // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...};
+ // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers
+ std::string symText;
+ llvm::raw_string_ostream oss(symText);
+ printSymbol(nSym, oss);
+ for (unsigned i = 0; i < ptrSize; ++i) {
+ if (i)
+ os << ", ";
+ llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper);
+ os << "(" << symText << ")";
+ }
+ pos += ptrSize;
+ nextSymbolPos = symbolPosInBuffer[++nSym];
+ assert(nextSymbolPos >= pos);
+ }
+}
+
+void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) {
+ unsigned int ptrSize = AP.MAI->getCodePointerSize();
+ symbolPosInBuffer.push_back(size);
+ unsigned int nSym = 0;
+ unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
+ assert(nextSymbolPos % ptrSize == 0);
+ for (unsigned int pos = 0; pos < size; pos += ptrSize) {
+ if (pos)
+ os << ", ";
+ if (pos == nextSymbolPos) {
+ printSymbol(nSym, os);
+ nextSymbolPos = symbolPosInBuffer[++nSym];
+ assert(nextSymbolPos % ptrSize == 0);
+ assert(nextSymbolPos >= pos + ptrSize);
+ } else if (ptrSize == 4)
+ os << support::endian::read32le(&buffer[pos]);
+ else
+ os << support::endian::read64le(&buffer[pos]);
+ }
+}
+
void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
if (localDecls.find(f) == localDecls.end())
return;
@@ -1494,8 +1575,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
unsigned sz = 0;
if (isa<IntegerType>(Ty)) {
sz = cast<IntegerType>(Ty)->getBitWidth();
- if (sz < 32)
- sz = 32;
+ sz = promoteScalarArgumentSize(sz);
} else if (isa<PointerType>(Ty))
sz = thePointerTy.getSizeInBits();
else if (Ty->isHalfTy())
@@ -1559,8 +1639,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
for (unsigned j = 0, je = elems; j != je; ++j) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32))
- sz = 32;
+ if (elemtype.isInteger())
+ sz = promoteScalarArgumentSize(sz);
O << "\t.reg .b" << sz << " ";
printParamName(I, paramIndex, O);
if (j < je - 1)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index cd61e99a103a..710c089e3325 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -61,24 +61,30 @@ class MCOperand;
class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
class AggBuffer {
- // Used to buffer the emitted string for initializing global
- // aggregates.
+ // Used to buffer the emitted string for initializing global aggregates.
//
- // Normally an aggregate (array, vector or structure) is emitted
- // as a u8[]. However, if one element/field of the aggregate
- // is a non-NULL address, then the aggregate is emitted as u32[]
- // or u64[].
+ // Normally an aggregate (array, vector, or structure) is emitted as a u8[].
+ // However, if either element/field of the aggregate is a non-NULL address,
+ // and all such addresses are properly aligned, then the aggregate is
+ // emitted as u32[] or u64[]. In the case of unaligned addresses, the
+ // aggregate is emitted as u8[], and the mask() operator is used for all
+ // pointers.
//
- // We first layout the aggregate in 'buffer' in bytes, except for
- // those symbol addresses. For the i-th symbol address in the
- //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer'
- // are filled with 0s. symbolPosInBuffer[i-1] records its position
- // in 'buffer', and Symbols[i-1] records the Value*.
+ // We first layout the aggregate in 'buffer' in bytes, except for those
+ // symbol addresses. For the i-th symbol address in the aggregate, its
+ // corresponding 4-byte or 8-byte elements in 'buffer' are filled with 0s.
+ // symbolPosInBuffer[i-1] records its position in 'buffer', and Symbols[i-1]
+ // records the Value*.
//
- // Once we have this AggBuffer setup, we can choose how to print
- // it out.
+ // Once we have this AggBuffer setup, we can choose how to print it out.
public:
- unsigned numSymbols; // number of symbol addresses
+ // number of symbol addresses
+ unsigned numSymbols() const { return Symbols.size(); }
+
+ bool allSymbolsAligned(unsigned ptrSize) const {
+ return llvm::all_of(symbolPosInBuffer,
+ [=](unsigned pos) { return pos % ptrSize == 0; });
+ }
private:
const unsigned size; // size of the buffer in bytes
@@ -94,15 +100,13 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
// SymbolsBeforeStripping[i].
SmallVector<const Value *, 4> SymbolsBeforeStripping;
unsigned curpos;
- raw_ostream &O;
NVPTXAsmPrinter &AP;
bool EmitGeneric;
public:
- AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP)
- : size(size), buffer(size), O(O), AP(AP) {
+ AggBuffer(unsigned size, NVPTXAsmPrinter &AP)
+ : size(size), buffer(size), AP(AP) {
curpos = 0;
- numSymbols = 0;
EmitGeneric = AP.EmitGeneric;
}
@@ -135,63 +139,13 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
symbolPosInBuffer.push_back(curpos);
Symbols.push_back(GVar);
SymbolsBeforeStripping.push_back(GVarBeforeStripping);
- numSymbols++;
}
- void print() {
- if (numSymbols == 0) {
- // print out in bytes
- for (unsigned i = 0; i < size; i++) {
- if (i)
- O << ", ";
- O << (unsigned int) buffer[i];
- }
- } else {
- // print out in 4-bytes or 8-bytes
- unsigned int pos = 0;
- unsigned int nSym = 0;
- unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
- unsigned int nBytes = 4;
- if (static_cast<const NVPTXTargetMachine &>(AP.TM).is64Bit())
- nBytes = 8;
- for (pos = 0; pos < size; pos += nBytes) {
- if (pos)
- O << ", ";
- if (pos == nextSymbolPos) {
- const Value *v = Symbols[nSym];
- const Value *v0 = SymbolsBeforeStripping[nSym];
- if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
- MCSymbol *Name = AP.getSymbol(GVar);
- PointerType *PTy = dyn_cast<PointerType>(v0->getType());
- bool IsNonGenericPointer = false; // Is v0 a non-generic pointer?
- if (PTy && PTy->getAddressSpace() != 0) {
- IsNonGenericPointer = true;
- }
- if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
- O << "generic(";
- Name->print(O, AP.MAI);
- O << ")";
- } else {
- Name->print(O, AP.MAI);
- }
- } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
- const MCExpr *Expr =
- AP.lowerConstantForGV(cast<Constant>(CExpr), false);
- AP.printMCExpr(*Expr, O);
- } else
- llvm_unreachable("symbol type unknown");
- nSym++;
- if (nSym >= numSymbols)
- nextSymbolPos = size + 1;
- else
- nextSymbolPos = symbolPosInBuffer[nSym];
- } else if (nBytes == 4)
- O << *(unsigned int *)(&buffer[pos]);
- else
- O << *(unsigned long long *)(&buffer[pos]);
- }
- }
- }
+ void printBytes(raw_ostream &os);
+ void printWords(raw_ostream &os);
+
+ private:
+ void printSymbol(unsigned nSym, raw_ostream &os);
};
friend class AggBuffer;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 6ad016dfa0a7..8264032b765a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -206,6 +206,40 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
}
}
+/// PromoteScalarIntegerPTX
+/// Used to make sure the arguments/returns are suitable for passing
+/// and promote them to a larger size if they're not.
+///
+/// The promoted type is placed in \p PromoteVT if the function returns true.
+static bool PromoteScalarIntegerPTX(const EVT &VT, MVT *PromotedVT) {
+ if (VT.isScalarInteger()) {
+ switch (PowerOf2Ceil(VT.getFixedSizeInBits())) {
+ default:
+ llvm_unreachable(
+ "Promotion is not suitable for scalars of size larger than 64-bits");
+ case 1:
+ *PromotedVT = MVT::i1;
+ break;
+ case 2:
+ case 4:
+ case 8:
+ *PromotedVT = MVT::i8;
+ break;
+ case 16:
+ *PromotedVT = MVT::i16;
+ break;
+ case 32:
+ *PromotedVT = MVT::i32;
+ break;
+ case 64:
+ *PromotedVT = MVT::i64;
+ break;
+ }
+ return EVT(*PromotedVT) != VT;
+ }
+ return false;
+}
+
// Check whether we can merge loads/stores of some of the pieces of a
// flattened function parameter or return value into a single vector
// load/store.
@@ -1291,8 +1325,7 @@ std::string NVPTXTargetLowering::getPrototype(
// PTX ABI requires all scalar return values to be at least 32
// bits in size. fp16 normally uses .b16 as its storage type in
// PTX, so its size must be adjusted here, too.
- if (size < 32)
- size = 32;
+ size = promoteScalarArgumentSize(size);
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
@@ -1343,8 +1376,7 @@ std::string NVPTXTargetLowering::getPrototype(
unsigned sz = 0;
if (isa<IntegerType>(Ty)) {
sz = cast<IntegerType>(Ty)->getBitWidth();
- if (sz < 32)
- sz = 32;
+ sz = promoteScalarArgumentSize(sz);
} else if (isa<PointerType>(Ty)) {
sz = PtrVT.getSizeInBits();
} else if (Ty->isHalfTy())
@@ -1515,11 +1547,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
NeedAlign = true;
} else {
// declare .param .b<size> .param<n>;
- if ((VT.isInteger() || VT.isFloatingPoint()) && TypeSize < 4) {
+ if (VT.isInteger() || VT.isFloatingPoint()) {
// PTX ABI requires integral types to be at least 32 bits in
// size. FP16 is loaded/stored using i16, so it's handled
// here as well.
- TypeSize = 4;
+ TypeSize = promoteScalarArgumentSize(TypeSize * 8) / 8;
}
SDValue DeclareScalarParamOps[] = {
Chain, DAG.getConstant(ParamCount, dl, MVT::i32),
@@ -1556,6 +1588,17 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
SDValue StVal = OutVals[OIdx];
+
+ MVT PromotedVT;
+ if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
+ EltVT = EVT(PromotedVT);
+ }
+ if (PromoteScalarIntegerPTX(StVal.getValueType(), &PromotedVT)) {
+ llvm::ISD::NodeType Ext =
+ Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ StVal = DAG.getNode(Ext, dl, PromotedVT, StVal);
+ }
+
if (IsByVal) {
auto PtrVT = getPointerTy(DL);
SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StVal,
@@ -1638,9 +1681,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Plus, this behavior is consistent with nvcc's.
if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() ||
(RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) {
- // Scalar needs to be at least 32bit wide
- if (resultsz < 32)
- resultsz = 32;
+ resultsz = promoteScalarArgumentSize(resultsz);
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
DAG.getConstant(resultsz, dl, MVT::i32),
@@ -1778,6 +1819,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT TheLoadType = VTs[i];
EVT EltType = Ins[i].VT;
Align EltAlign = commonAlignment(RetAlign, Offsets[i]);
+ MVT PromotedVT;
+
+ if (PromoteScalarIntegerPTX(TheLoadType, &PromotedVT)) {
+ TheLoadType = EVT(PromotedVT);
+ EltType = EVT(PromotedVT);
+ needTruncate = true;
+ }
+
if (ExtendIntegerRetVal) {
TheLoadType = MVT::i32;
EltType = MVT::i32;
@@ -2558,6 +2607,13 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// v2f16 was loaded as an i32. Now we must bitcast it back.
else if (EltVT == MVT::v2f16)
Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt);
+
+ // If a promoted integer type is used, truncate down to the original
+ MVT PromotedVT;
+ if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) {
+ Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
+ }
+
// Extend the element if necessary (e.g. an i8 is loaded
// into an i16 register)
if (Ins[InsIdx].VT.isInteger() &&
@@ -2627,11 +2683,26 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return Chain;
const DataLayout &DL = DAG.getDataLayout();
+ SmallVector<SDValue, 16> PromotedOutVals;
SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ SDValue PromotedOutVal = OutVals[i];
+ MVT PromotedVT;
+ if (PromoteScalarIntegerPTX(VTs[i], &PromotedVT)) {
+ VTs[i] = EVT(PromotedVT);
+ }
+ if (PromoteScalarIntegerPTX(PromotedOutVal.getValueType(), &PromotedVT)) {
+ llvm::ISD::NodeType Ext =
+ Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ PromotedOutVal = DAG.getNode(Ext, dl, PromotedVT, PromotedOutVal);
+ }
+ PromotedOutVals.push_back(PromotedOutVal);
+ }
+
auto VectorInfo = VectorizePTXValueVTs(
VTs, Offsets,
RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL)
@@ -2652,12 +2723,14 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32));
}
- SDValue RetVal = OutVals[i];
+ SDValue OutVal = OutVals[i];
+ SDValue RetVal = PromotedOutVals[i];
+
if (ExtendIntegerRetVal) {
RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND,
dl, MVT::i32, RetVal);
- } else if (RetVal.getValueSizeInBits() < 16) {
+ } else if (OutVal.getValueSizeInBits() < 16) {
// Use 16-bit registers for small load-stores as it's the
// smallest general purpose register size supported by NVPTX.
RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal);
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 9a249d3da3d5..cea3dce3f1c5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -77,6 +77,7 @@ public:
bool hasImageHandles() const;
bool hasFP16Math() const { return SmVersion >= 53; }
bool allowFP16Math() const;
+ bool hasMaskOperator() const { return PTXVersion >= 71; }
unsigned int getSmVersion() const { return SmVersion; }
std::string getTargetName() const { return TargetName; }
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index bf1524194cfb..6fee57b4664e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -59,6 +59,16 @@ bool isKernelFunction(const Function &);
bool getAlign(const Function &, unsigned index, unsigned &);
bool getAlign(const CallInst &, unsigned index, unsigned &);
+// PTX ABI requires all scalar argument/return values to have
+// bit-size as a power of two of at least 32 bits.
+inline unsigned promoteScalarArgumentSize(unsigned size) {
+ if (size <= 32)
+ return 32;
+ else if (size <= 64)
+ return 64;
+ else
+ return size;
+}
}
#endif
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 4247cf557c2a..14c4fd3a9ffa 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -5473,7 +5473,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
case ISD::MUL: {
SDValue Op1 = N->getOperand(1);
- if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64)
+ if (Op1.getOpcode() != ISD::Constant ||
+ (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
break;
// If the multiplier fits int16, we can handle it with mulli.
@@ -5486,13 +5487,27 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
// DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
uint64_t ImmSh = Imm >> Shift;
- if (isInt<16>(ImmSh)) {
- uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
+ if (!isInt<16>(ImmSh))
+ break;
+
+ uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
+ if (Op1.getValueType() == MVT::i64) {
SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
N->getOperand(0), SDImm);
- CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0),
- getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl));
+
+ SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
+ getI32Imm(63 - Shift, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
+ return;
+ } else {
+ SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);
+ SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,
+ N->getOperand(0), SDImm);
+
+ SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
+ getI32Imm(0, dl), getI32Imm(31 - Shift, dl)};
+ CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
return;
}
break;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 59486c323567..c85f57f04c7d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1086,8 +1086,8 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
// For opcodes with the ReMaterializable flag set, this function is called to
// verify the instruction is really rematable.
-bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AliasAnalysis *AA) const {
+bool PPCInstrInfo::isReallyTriviallyReMaterializable(
+ const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
// This function should only be called for opcodes with the ReMaterializable
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index e22b0086bde8..980bb3107a8b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -495,8 +495,7 @@ public:
unsigned &SubIdx) const override;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
- bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const override;
+ bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 4689c0638ca6..23703ac54d0e 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -568,7 +568,7 @@ bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(BaseSCEV);
// Make sure the base is able to expand.
- if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE))
+ if (!SCEVE.isSafeToExpand(BasePtrSCEV->getStart()))
return MadeChange;
assert(BasePtrSCEV->isAffine() &&
@@ -602,7 +602,7 @@ bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
// Make sure offset is able to expand. Only need to check one time as the
// offsets are reused between different chains.
if (!BaseElemIdx)
- if (!isSafeToExpand(OffsetSCEV, *SE))
+ if (!SCEVE.isSafeToExpand(OffsetSCEV))
return false;
Value *OffsetValue = SCEVE.expandCodeFor(
@@ -1018,14 +1018,13 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(
if (!BasePtrSCEV->isAffine())
return MadeChange;
- if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE))
- return MadeChange;
-
- SmallPtrSet<Value *, 16> DeletedPtrs;
-
BasicBlock *Header = L->getHeader();
SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(),
"loopprepare-formrewrite");
+ if (!SCEVE.isSafeToExpand(BasePtrSCEV->getStart()))
+ return MadeChange;
+
+ SmallPtrSet<Value *, 16> DeletedPtrs;
// For some DS form load/store instructions, it can also be an update form,
// if the stride is constant and is a multipler of 4. Use update form if
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 917837a307ad..e6140edc8403 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -30,6 +30,9 @@ class MachineInstr;
class MachineOperand;
class PassRegistry;
+FunctionPass *createRISCVCodeGenPreparePass();
+void initializeRISCVCodeGenPreparePass(PassRegistry &);
+
bool lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP);
bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index e783ef38b448..8a6f69c7f7ca 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -19,6 +19,19 @@ def HasStdExtM : Predicate<"Subtarget->hasStdExtM()">,
AssemblerPredicate<(all_of FeatureStdExtM),
"'M' (Integer Multiplication and Division)">;
+def FeatureStdExtZmmul
+ : SubtargetFeature<"zmmul", "HasStdExtZmmul", "true",
+ "'Zmmul' (Integer Multiplication)">;
+def HasStdExtZmmul : Predicate<"Subtarget->hasStdExtZmmul()">,
+ AssemblerPredicate<(all_of FeatureStdExtZmmul),
+ "'Zmmul' (Integer Multiplication)">;
+
+def HasStdExtMOrZmmul
+ : Predicate<"Subtarget->hasStdExtM() || Subtarget->hasStdExtZmmul()">,
+ AssemblerPredicate<(any_of FeatureStdExtM, FeatureStdExtZmmul),
+ "'M' (Integer Multiplication and Division) or "
+ "'Zmmul' (Integer Multiplication)">;
+
def FeatureStdExtA
: SubtargetFeature<"a", "HasStdExtA", "true",
"'A' (Atomic Instructions)">;
@@ -465,7 +478,8 @@ def TuneNoDefaultUnroll
"Disable default unroll preference.">;
def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
- "SiFive 7-Series processors">;
+ "SiFive 7-Series processors",
+ [TuneNoDefaultUnroll]>;
//===----------------------------------------------------------------------===//
// Named operands for CSR instructions.
@@ -499,9 +513,9 @@ def : ProcessorModel<"rocket-rv32", RocketModel, []>;
def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>;
def : ProcessorModel<"sifive-7-rv32", SiFive7Model, [],
- [TuneSiFive7, TuneNoDefaultUnroll]>;
+ [TuneSiFive7]>;
def : ProcessorModel<"sifive-7-rv64", SiFive7Model, [Feature64Bit],
- [TuneSiFive7, TuneNoDefaultUnroll]>;
+ [TuneSiFive7]>;
def : ProcessorModel<"sifive-e20", RocketModel, [FeatureStdExtM,
FeatureStdExtC]>;
@@ -528,7 +542,7 @@ def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
FeatureStdExtC],
- [TuneSiFive7, TuneNoDefaultUnroll]>;
+ [TuneSiFive7]>;
def : ProcessorModel<"sifive-s21", RocketModel, [Feature64Bit,
FeatureStdExtM,
@@ -553,7 +567,7 @@ def : ProcessorModel<"sifive-s76", SiFive7Model, [Feature64Bit,
FeatureStdExtF,
FeatureStdExtD,
FeatureStdExtC],
- [TuneSiFive7, TuneNoDefaultUnroll]>;
+ [TuneSiFive7]>;
def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit,
FeatureStdExtM,
@@ -568,7 +582,7 @@ def : ProcessorModel<"sifive-u74", SiFive7Model, [Feature64Bit,
FeatureStdExtF,
FeatureStdExtD,
FeatureStdExtC],
- [TuneSiFive7, TuneNoDefaultUnroll]>;
+ [TuneSiFive7]>;
//===----------------------------------------------------------------------===//
// Define the RISC-V target.
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
new file mode 100644
index 000000000000..b700a9ede39b
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -0,0 +1,169 @@
+//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a RISCV specific version of CodeGenPrepare.
+// It munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-codegenprepare"
+#define PASS_NAME "RISCV CodeGenPrepare"
+
+STATISTIC(NumZExtToSExt, "Number of SExt instructions converted to ZExt");
+
+namespace {
+
+class RISCVCodeGenPrepare : public FunctionPass {
+ const DataLayout *DL;
+ const RISCVSubtarget *ST;
+
+public:
+ static char ID;
+
+ RISCVCodeGenPrepare() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override { return PASS_NAME; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetPassConfig>();
+ }
+
+private:
+ bool optimizeZExt(ZExtInst *I);
+ bool optimizeAndExt(BinaryOperator *BO);
+};
+
+} // end anonymous namespace
+
+bool RISCVCodeGenPrepare::optimizeZExt(ZExtInst *ZExt) {
+ if (!ST->is64Bit())
+ return false;
+
+ Value *Src = ZExt->getOperand(0);
+
+ // We only care about ZExt from i32 to i64.
+ if (!ZExt->getType()->isIntegerTy(64) || !Src->getType()->isIntegerTy(32))
+ return false;
+
+ // Look for an opportunity to replace (i64 (zext (i32 X))) with a sext if we
+ // can determine that the sign bit of X is zero via a dominating condition.
+ // This often occurs with widened induction variables.
+ if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src,
+ Constant::getNullValue(Src->getType()), ZExt,
+ *DL)) {
+ auto *SExt = new SExtInst(Src, ZExt->getType(), "", ZExt);
+ SExt->takeName(ZExt);
+ SExt->setDebugLoc(ZExt->getDebugLoc());
+
+ ZExt->replaceAllUsesWith(SExt);
+ ZExt->eraseFromParent();
+ ++NumZExtToSExt;
+ return true;
+ }
+
+ return false;
+}
+
+// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
+// but bits 63:32 are zero. If we can prove that bit 31 of X is 0, we can fill
+// the upper 32 bits with ones. A separate transform will turn (zext X) into
+// (sext X) for the same condition.
+bool RISCVCodeGenPrepare::optimizeAndExt(BinaryOperator *BO) {
+ if (!ST->is64Bit())
+ return false;
+
+ if (BO->getOpcode() != Instruction::And)
+ return false;
+
+ if (!BO->getType()->isIntegerTy(64))
+ return false;
+
+ // Left hand side should be sext or zext.
+ Instruction *LHS = dyn_cast<Instruction>(BO->getOperand(0));
+ if (!LHS || (!isa<SExtInst>(LHS) && !isa<ZExtInst>(LHS)))
+ return false;
+
+ Value *LHSSrc = LHS->getOperand(0);
+ if (!LHSSrc->getType()->isIntegerTy(32))
+ return false;
+
+ // Right hand side should be a constant.
+ Value *RHS = BO->getOperand(1);
+
+ auto *CI = dyn_cast<ConstantInt>(RHS);
+ if (!CI)
+ return false;
+ uint64_t C = CI->getZExtValue();
+
+ // Look for constants that fit in 32 bits but not simm12, and can be made
+ // into simm12 by sign extending bit 31. This will allow use of ANDI.
+ // TODO: Is worth making simm32?
+ if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))
+ return false;
+
+ // If we can determine the sign bit of the input is 0, we can replace the
+ // And mask constant.
+ if (!isImpliedByDomCondition(ICmpInst::ICMP_SGE, LHSSrc,
+ Constant::getNullValue(LHSSrc->getType()),
+ LHS, *DL))
+ return false;
+
+ // Sign extend the constant and replace the And operand.
+ C = SignExtend64<32>(C);
+ BO->setOperand(1, ConstantInt::get(LHS->getType(), C));
+
+ return true;
+}
+
+bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ auto &TM = TPC.getTM<RISCVTargetMachine>();
+ ST = &TM.getSubtarget<RISCVSubtarget>(F);
+
+ DL = &F.getParent()->getDataLayout();
+
+ bool MadeChange = false;
+ for (auto &BB : F) {
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
+ if (auto *ZExt = dyn_cast<ZExtInst>(&I))
+ MadeChange |= optimizeZExt(ZExt);
+ else if (I.getOpcode() == Instruction::And)
+ MadeChange |= optimizeAndExt(cast<BinaryOperator>(&I));
+ }
+ }
+
+ return MadeChange;
+}
+
+INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
+
+char RISCVCodeGenPrepare::ID = 0;
+
+FunctionPass *llvm::createRISCVCodeGenPreparePass() {
+ return new RISCVCodeGenPrepare();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 5b823af1e9b8..d5826b46d738 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -690,6 +690,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// 32 trailing ones should use srliw via tablegen pattern.
if (TrailingOnes == 32 || ShAmt >= TrailingOnes)
break;
+ // If C2 is (1 << ShAmt) use bexti if possible.
+ if (Subtarget->hasStdExtZbs() && ShAmt + 1 == TrailingOnes) {
+ SDNode *BEXTI =
+ CurDAG->getMachineNode(RISCV::BEXTI, DL, VT, N0->getOperand(0),
+ CurDAG->getTargetConstant(ShAmt, DL, VT));
+ ReplaceNode(Node, BEXTI);
+ return;
+ }
unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
@@ -939,18 +947,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (!isMask_64(C2))
break;
- // This should be the only use of the AND unless we will use
- // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND
- // constants.
- if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF))
- break;
-
- // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this
- // optimization.
- if (isInt<12>(C2) ||
+ // If this can be an ANDI, ZEXT.H or ZEXT.W, don't do this if the ANDI/ZEXT
+ // has multiple users or the constant is a simm12. This prevents inserting
+ // a shift and still have uses of the AND/ZEXT. Shifting a simm12 will
+ // likely make it more costly to materialize. Otherwise, using a SLLI
+ // might allow it to be compressed.
+ bool IsANDIOrZExt =
+ isInt<12>(C2) ||
(C2 == UINT64_C(0xFFFF) &&
(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) ||
- (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()))
+ (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba());
+ if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
break;
// We need to shift left the AND input and C1 by a total of XLen bits.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 658865703079..1702546b58a6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -215,21 +215,26 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::MULO_I64, nullptr);
}
- if (!Subtarget.hasStdExtM()) {
- setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::SDIV, ISD::UDIV,
- ISD::SREM, ISD::UREM},
- XLenVT, Expand);
+ if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
+ setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
} else {
if (Subtarget.is64Bit()) {
setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
-
- setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
- {MVT::i8, MVT::i16, MVT::i32}, Custom);
} else {
setOperationAction(ISD::MUL, MVT::i64, Custom);
}
}
+ if (!Subtarget.hasStdExtM()) {
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
+ XLenVT, Expand);
+ } else {
+ if (Subtarget.is64Bit()) {
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
+ {MVT::i8, MVT::i16, MVT::i32}, Custom);
+ }
+ }
+
setOperationAction(
{ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
Expand);
@@ -294,7 +299,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, XLenVT, Custom);
}
- static constexpr ISD::NodeType FPLegalNodeTypes[] = {
+ static const unsigned FPLegalNodeTypes[] = {
ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
@@ -307,7 +312,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
- static const ISD::NodeType FPOpToExpand[] = {
+ static const unsigned FPOpToExpand[] = {
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
@@ -315,8 +320,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
if (Subtarget.hasStdExtZfh()) {
- for (auto NT : FPLegalNodeTypes)
- setOperationAction(NT, MVT::f16, Legal);
+ setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
@@ -340,14 +344,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (Subtarget.hasStdExtF()) {
- for (auto NT : FPLegalNodeTypes)
- setOperationAction(NT, MVT::f32, Legal);
+ setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
- for (auto Op : FPOpToExpand)
- setOperationAction(Op, MVT::f32, Expand);
+ setOperationAction(FPOpToExpand, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
}
@@ -356,8 +358,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
if (Subtarget.hasStdExtD()) {
- for (auto NT : FPLegalNodeTypes)
- setOperationAction(NT, MVT::f64, Legal);
+ setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
@@ -366,8 +367,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
- for (auto Op : FPOpToExpand)
- setOperationAction(Op, MVT::f64, Expand);
+ setOperationAction(FPOpToExpand, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
}
@@ -458,17 +458,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SETCC, ISD::VP_FP_ROUND,
ISD::VP_FP_EXTEND};
+ static const unsigned IntegerVecReduceOps[] = {
+ ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
+ ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
+ ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
+
+ static const unsigned FloatingPointVecReduceOps[] = {
+ ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
+ ISD::VECREDUCE_FMAX};
+
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
// element type being illegal.
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
MVT::i64, Custom);
- setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND,
- ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR,
- ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
- ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN},
- MVT::i64, Custom);
+ setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
@@ -581,11 +586,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
- setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND,
- ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR,
- ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
- ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN},
- VT, Custom);
+ setOperationAction(IntegerVecReduceOps, VT, Custom);
setOperationAction(IntegerVPOps, VT, Custom);
@@ -661,9 +662,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND},
VT, Custom);
- setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD,
- ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX},
- VT, Custom);
+ setOperationAction(FloatingPointVecReduceOps, VT, Custom);
// Expand FP operations that need libcalls.
setOperationAction(ISD::FREM, VT, Expand);
@@ -905,17 +904,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND},
VT, Custom);
- for (auto CC : VFPCCToExpand)
- setCondCodeAction(CC, VT, Expand);
+ setCondCodeAction(VFPCCToExpand, VT, Expand);
setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::BITCAST, VT, Custom);
- setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD,
- ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX},
- VT, Custom);
+ setOperationAction(FloatingPointVecReduceOps, VT, Custom);
setOperationAction(FloatingPointVPOps, VT, Custom);
}
@@ -943,7 +939,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setJumpIsExpensive();
setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
- ISD::OR, ISD::XOR});
+ ISD::OR, ISD::XOR, ISD::SETCC});
if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);
@@ -1374,6 +1370,23 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context
// with 1/-1.
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
ISD::CondCode &CC, SelectionDAG &DAG) {
+ // If this is a single bit test that can't be handled by ANDI, shift the
+ // bit to be tested to the MSB and perform a signed compare with 0.
+ if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
+ LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
+ isa<ConstantSDNode>(LHS.getOperand(1))) {
+ uint64_t Mask = LHS.getConstantOperandVal(1);
+ if (isPowerOf2_64(Mask) && !isInt<12>(Mask)) {
+ CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
+ unsigned ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
+ LHS = LHS.getOperand(0);
+ if (ShAmt != 0)
+ LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
+ DAG.getConstant(ShAmt, DL, LHS.getValueType()));
+ return;
+ }
+ }
+
// Convert X > -1 to X >= 0.
if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
RHS = DAG.getConstant(0, DL, RHS.getValueType());
@@ -3707,10 +3720,7 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
SDLoc DL(Op);
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
assert(N->getOffset() == 0 && "unexpected offset in global node");
-
- const GlobalValue *GV = N->getGlobal();
- bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
- return getAddr(N, DAG, IsLocal);
+ return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
}
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
@@ -8130,6 +8140,50 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
+// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
+// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
+// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
+// can become a sext.w instead of a shift pair.
+static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ if (OpVT != MVT::i64 || !Subtarget.is64Bit())
+ return SDValue();
+
+ // RHS needs to be a constant.
+ auto *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (!N1C)
+ return SDValue();
+
+ // LHS needs to be (and X, 0xffffffff).
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
+ !isa<ConstantSDNode>(N0.getOperand(1)) ||
+ N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
+ return SDValue();
+
+ // Looking for an equality compare.
+ ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ if (!isIntEqualitySetCC(Cond))
+ return SDValue();
+
+ const APInt &C1 = cast<ConstantSDNode>(N1)->getAPIntValue();
+
+ SDLoc dl(N);
+ // If the constant is larger than 2^32 - 1 it is impossible for both sides
+ // to be equal.
+ if (C1.getActiveBits() > 32)
+ return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
+
+ SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
+ N0.getOperand(0), DAG.getValueType(MVT::i32));
+ return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
+ dl, OpVT), Cond);
+}
+
static SDValue
performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
@@ -8658,6 +8712,75 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(32 - ShAmt, DL, MVT::i64));
}
+// Perform common combines for BR_CC and SELECT_CC condtions.
+static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
+ SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
+ ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
+ if (!ISD::isIntEqualitySetCC(CCVal))
+ return false;
+
+ // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
+ // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
+ if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
+ LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
+ // If we're looking for eq 0 instead of ne 0, we need to invert the
+ // condition.
+ bool Invert = CCVal == ISD::SETEQ;
+ CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+ if (Invert)
+ CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
+
+ RHS = LHS.getOperand(1);
+ LHS = LHS.getOperand(0);
+ translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
+
+ CC = DAG.getCondCode(CCVal);
+ return true;
+ }
+
+ // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
+ if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
+ RHS = LHS.getOperand(1);
+ LHS = LHS.getOperand(0);
+ return true;
+ }
+
+ // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
+ if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
+ LHS.getOperand(1).getOpcode() == ISD::Constant) {
+ SDValue LHS0 = LHS.getOperand(0);
+ if (LHS0.getOpcode() == ISD::AND &&
+ LHS0.getOperand(1).getOpcode() == ISD::Constant) {
+ uint64_t Mask = LHS0.getConstantOperandVal(1);
+ uint64_t ShAmt = LHS.getConstantOperandVal(1);
+ if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
+ CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
+ CC = DAG.getCondCode(CCVal);
+
+ ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
+ LHS = LHS0.getOperand(0);
+ if (ShAmt != 0)
+ LHS =
+ DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
+ DAG.getConstant(ShAmt, DL, LHS.getValueType()));
+ return true;
+ }
+ }
+ }
+
+ // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
+ // This can occur when legalizing some floating point comparisons.
+ APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
+ if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
+ CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
+ CC = DAG.getCondCode(CCVal);
+ RHS = DAG.getConstant(0, DL, LHS.getValueType());
+ return true;
+ }
+
+ return false;
+}
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -8872,6 +8995,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FMAXNUM:
case ISD::FMINNUM:
return combineBinOpToReduce(N, DAG);
+ case ISD::SETCC:
+ return performSETCCCombine(N, DAG, Subtarget);
case ISD::SIGN_EXTEND_INREG:
return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
case ISD::ZERO_EXTEND:
@@ -8900,110 +9025,32 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// Transform
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ SDValue CC = N->getOperand(2);
SDValue TrueV = N->getOperand(3);
SDValue FalseV = N->getOperand(4);
+ SDLoc DL(N);
// If the True and False values are the same, we don't need a select_cc.
if (TrueV == FalseV)
return TrueV;
- ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
- if (!ISD::isIntEqualitySetCC(CCVal))
- break;
-
- // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
- // (select_cc X, Y, lt, trueV, falseV)
- // Sometimes the setcc is introduced after select_cc has been formed.
- if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
- LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
- // If we're looking for eq 0 instead of ne 0, we need to invert the
- // condition.
- bool Invert = CCVal == ISD::SETEQ;
- CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
- if (Invert)
- CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
-
- SDLoc DL(N);
- RHS = LHS.getOperand(1);
- LHS = LHS.getOperand(0);
- translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
-
- SDValue TargetCC = DAG.getCondCode(CCVal);
- return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
- {LHS, RHS, TargetCC, TrueV, FalseV});
- }
-
- // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
- // (select_cc X, Y, eq/ne, trueV, falseV)
- if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
- return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
- {LHS.getOperand(0), LHS.getOperand(1),
- N->getOperand(2), TrueV, FalseV});
- // (select_cc X, 1, setne, trueV, falseV) ->
- // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
- // This can occur when legalizing some floating point comparisons.
- APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
- if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
- SDLoc DL(N);
- CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
- SDValue TargetCC = DAG.getCondCode(CCVal);
- RHS = DAG.getConstant(0, DL, LHS.getValueType());
+ if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
- {LHS, RHS, TargetCC, TrueV, FalseV});
- }
+ {LHS, RHS, CC, TrueV, FalseV});
- break;
+ return SDValue();
}
case RISCVISD::BR_CC: {
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
- ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
- if (!ISD::isIntEqualitySetCC(CCVal))
- break;
-
- // Fold (br_cc (setlt X, Y), 0, ne, dest) ->
- // (br_cc X, Y, lt, dest)
- // Sometimes the setcc is introduced after br_cc has been formed.
- if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
- LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
- // If we're looking for eq 0 instead of ne 0, we need to invert the
- // condition.
- bool Invert = CCVal == ISD::SETEQ;
- CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
- if (Invert)
- CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
-
- SDLoc DL(N);
- RHS = LHS.getOperand(1);
- LHS = LHS.getOperand(0);
- translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
+ SDValue CC = N->getOperand(3);
+ SDLoc DL(N);
+ if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
- N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
- N->getOperand(4));
- }
-
- // Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
- // (br_cc X, Y, eq/ne, trueV, falseV)
- if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
- return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
- N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
- N->getOperand(3), N->getOperand(4));
-
- // (br_cc X, 1, setne, br_cc) ->
- // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
- // This can occur when legalizing some floating point comparisons.
- APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
- if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
- SDLoc DL(N);
- CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
- SDValue TargetCC = DAG.getCondCode(CCVal);
- RHS = DAG.getConstant(0, DL, LHS.getValueType());
- return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
- N->getOperand(0), LHS, RHS, TargetCC,
- N->getOperand(4));
- }
- break;
+ N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
+
+ return SDValue();
}
case ISD::BITREVERSE:
return performBITREVERSECombine(N, DAG, Subtarget);
@@ -9299,6 +9346,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
bool RISCVTargetLowering::isDesirableToCommuteWithShift(
const SDNode *N, CombineLevel Level) const {
+ assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) &&
+ "Expected shift op");
+
// The following folds are only desirable if `(OP _, c1 << c2)` can be
// materialised in fewer instructions than `(OP _, c1)`:
//
@@ -9357,7 +9408,8 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant(
return false;
// Only handle AND for now.
- if (Op.getOpcode() != ISD::AND)
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
@@ -9376,12 +9428,13 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant(
auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
};
- auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
+ auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
if (NewMask == Mask)
return true;
SDLoc DL(Op);
- SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
- SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+ SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
+ Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
};
@@ -9390,18 +9443,21 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant(
if (ShrunkMask.isSignedIntN(12))
return false;
- // Preserve (and X, 0xffff) when zext.h is supported.
- if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
- APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
- if (IsLegalMask(NewMask))
- return UseMask(NewMask);
- }
+ // And has a few special cases for zext.
+ if (Opcode == ISD::AND) {
+ // Preserve (and X, 0xffff) when zext.h is supported.
+ if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
+ APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
+ if (IsLegalMask(NewMask))
+ return UseMask(NewMask);
+ }
- // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
- if (VT == MVT::i64) {
- APInt NewMask = APInt(64, 0xffffffff);
- if (IsLegalMask(NewMask))
- return UseMask(NewMask);
+ // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
+ if (VT == MVT::i64) {
+ APInt NewMask = APInt(64, 0xffffffff);
+ if (IsLegalMask(NewMask))
+ return UseMask(NewMask);
+ }
}
// For the remaining optimizations, we need to be able to make a negative
@@ -9414,10 +9470,11 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant(
// Try to make a 12 bit negative immediate. If that fails try to make a 32
// bit negative immediate unless the shrunk immediate already fits in 32 bits.
+ // If we can't create a simm12, we shouldn't change opaque constants.
APInt NewMask = ShrunkMask;
if (MinSignedBits <= 12)
NewMask.setBitsFrom(11);
- else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
+ else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
NewMask.setBitsFrom(31);
else
return false;
@@ -10015,15 +10072,15 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
LastSelectPseudo = &*SequenceMBBI;
SequenceMBBI->collectDebugValues(SelectDebugValues);
SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
- } else {
- if (SequenceMBBI->hasUnmodeledSideEffects() ||
- SequenceMBBI->mayLoadOrStore())
- break;
- if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
- return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
- }))
- break;
+ continue;
}
+ if (SequenceMBBI->hasUnmodeledSideEffects() ||
+ SequenceMBBI->mayLoadOrStore())
+ break;
+ if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
+ return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
+ }))
+ break;
}
const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -12159,7 +12216,8 @@ bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
// FIXME: This doesn't work for zve32, but that's already broken
// elsewhere for the same reason.
assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
- assert(RISCV::RVVBitsPerBlock == 64 && "RVVBitsPerBlock changed, audit needed");
+ static_assert(RISCV::RVVBitsPerBlock == 64,
+ "RVVBitsPerBlock changed, audit needed");
return true;
}
@@ -12214,10 +12272,12 @@ bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned)
bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const {
// Check integral scalar types.
+ const bool HasExtMOrZmmul =
+ Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
if (VT.isScalarInteger()) {
// Omit the optimization if the sub target has the M extension and the data
// size exceeds XLen.
- if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
+ if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
return false;
if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
// Break the MUL to a SLLI and an ADD/SUB.
@@ -12232,7 +12292,7 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
return true;
// Omit the following optimization if the sub target has the M extension
// and the data size >= XLen.
- if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
+ if (HasExtMOrZmmul && VT.getSizeInBits() >= Subtarget.getXLen())
return false;
// Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
// a pair of LUI/ADDI.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 685604ad9a59..75a79895330f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -637,6 +637,64 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
}
}
+MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
+ MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
+ VirtRegMap *VRM) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // The below optimizations narrow the load so they are only valid for little
+ // endian.
+ // TODO: Support big endian by adding an offset into the frame object?
+ if (MF.getDataLayout().isBigEndian())
+ return nullptr;
+
+ // Fold load from stack followed by sext.w into lw.
+ // TODO: Fold with sext.b, sext.h, zext.b, zext.h, zext.w?
+ if (Ops.size() != 1 || Ops[0] != 1)
+ return nullptr;
+
+ unsigned LoadOpc;
+ switch (MI.getOpcode()) {
+ default:
+ if (RISCV::isSEXT_W(MI)) {
+ LoadOpc = RISCV::LW;
+ break;
+ }
+ if (RISCV::isZEXT_W(MI)) {
+ LoadOpc = RISCV::LWU;
+ break;
+ }
+ if (RISCV::isZEXT_B(MI)) {
+ LoadOpc = RISCV::LBU;
+ break;
+ }
+ return nullptr;
+ case RISCV::SEXT_H:
+ LoadOpc = RISCV::LH;
+ break;
+ case RISCV::SEXT_B:
+ LoadOpc = RISCV::LB;
+ break;
+ case RISCV::ZEXT_H_RV32:
+ case RISCV::ZEXT_H_RV64:
+ LoadOpc = RISCV::LHU;
+ break;
+ }
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FrameIndex),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlign(FrameIndex));
+
+ Register DstReg = MI.getOperand(0).getReg();
+ return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc),
+ DstReg)
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
+
void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
@@ -1799,17 +1857,30 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
.addReg(VL, RegState::Kill)
.addImm(ShiftAmount)
.setMIFlag(Flag);
- } else if ((NumOfVReg == 3 || NumOfVReg == 5 || NumOfVReg == 9) &&
- STI.hasStdExtZba()) {
- // We can use Zba SHXADD instructions for multiply in some cases.
- // TODO: Generalize to SHXADD+SLLI.
+ } else if (STI.hasStdExtZba() &&
+ ((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) ||
+ (NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) ||
+ (NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) {
+ // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
unsigned Opc;
- switch (NumOfVReg) {
- default: llvm_unreachable("Unexpected number of vregs");
- case 3: Opc = RISCV::SH1ADD; break;
- case 5: Opc = RISCV::SH2ADD; break;
- case 9: Opc = RISCV::SH3ADD; break;
+ uint32_t ShiftAmount;
+ if (NumOfVReg % 9 == 0) {
+ Opc = RISCV::SH3ADD;
+ ShiftAmount = Log2_64(NumOfVReg / 9);
+ } else if (NumOfVReg % 5 == 0) {
+ Opc = RISCV::SH2ADD;
+ ShiftAmount = Log2_64(NumOfVReg / 5);
+ } else if (NumOfVReg % 3 == 0) {
+ Opc = RISCV::SH1ADD;
+ ShiftAmount = Log2_64(NumOfVReg / 3);
+ } else {
+ llvm_unreachable("Unexpected number of vregs");
}
+ if (ShiftAmount)
+ BuildMI(MBB, II, DL, get(RISCV::SLLI), VL)
+ .addReg(VL, RegState::Kill)
+ .addImm(ShiftAmount)
+ .setMIFlag(Flag);
BuildMI(MBB, II, DL, get(Opc), VL)
.addReg(VL, RegState::Kill)
.addReg(VL)
@@ -1839,10 +1910,11 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
} else {
Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
movImm(MBB, II, DL, N, NumOfVReg, Flag);
- if (!STI.hasStdExtM())
+ if (!STI.hasStdExtM() && !STI.hasStdExtZmmul())
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
MF.getFunction(),
- "M-extension must be enabled to calculate the vscaled size/offset."});
+ "M- or Zmmul-extension must be enabled to calculate the vscaled size/"
+ "offset."});
BuildMI(MBB, II, DL, get(RISCV::MUL), VL)
.addReg(VL, RegState::Kill)
.addReg(N, RegState::Kill)
@@ -1852,6 +1924,24 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
return VL;
}
+// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
+bool RISCV::isSEXT_W(const MachineInstr &MI) {
+ return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0;
+}
+
+// Returns true if this is the zext.w pattern, adduw rd, rs1, x0.
+bool RISCV::isZEXT_W(const MachineInstr &MI) {
+ return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0;
+}
+
+// Returns true if this is the zext.b pattern, andi rd, rs1, 255.
+bool RISCV::isZEXT_B(const MachineInstr &MI) {
+ return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255;
+}
+
static bool isRVVWholeLoadStore(unsigned Opcode) {
switch (Opcode) {
default:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 5368437618bd..4aa9ded5b3a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -69,6 +69,14 @@ public:
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ using TargetInstrInfo::foldMemoryOperandImpl;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
+ ArrayRef<unsigned> Ops,
+ MachineBasicBlock::iterator InsertPt,
+ int FrameIndex,
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const override;
+
// Materializes the given integer Val into DstReg.
void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
@@ -183,6 +191,11 @@ protected:
namespace RISCV {
+// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
+bool isSEXT_W(const MachineInstr &MI);
+bool isZEXT_W(const MachineInstr &MI);
+bool isZEXT_B(const MachineInstr &MI);
+
// Returns true if the given MI is an RVV instruction opcode for which we may
// expect to see a FrameIndex operand.
bool isRVVSpill(const MachineInstr &MI);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 06a90438838e..78fd09fbf387 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1278,6 +1278,13 @@ def : Pat<(setgt GPR:$rs1, simm12_minus1_nonzero:$imm),
def : Pat<(setugt GPR:$rs1, simm12_minus1_nonzero:$imm),
(XORI (SLTIU GPR:$rs1, (ImmPlus1 simm12_minus1_nonzero:$imm)), 1)>;
+// If negating a pattern that requires an XORI above, we can fold the XORI with
+// the NEG. The XORI is equivalent to 1-X and negating gives X-1.
+def : Pat<(ineg (setuge GPR:$rs1, GPR:$rs2)), (ADDI (SLTU GPR:$rs1, GPR:$rs2), -1)>;
+def : Pat<(ineg (setule GPR:$rs1, GPR:$rs2)), (ADDI (SLTU GPR:$rs2, GPR:$rs1), -1)>;
+def : Pat<(ineg (setge GPR:$rs1, GPR:$rs2)), (ADDI (SLT GPR:$rs1, GPR:$rs2), -1)>;
+def : Pat<(ineg (setle GPR:$rs1, GPR:$rs2)), (ADDI (SLT GPR:$rs2, GPR:$rs1), -1)>;
+
def IntCCtoRISCVCC : SDNodeXForm<riscv_selectcc, [{
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
RISCVCC::CondCode BrCC = getRISCVCCForIntCC(CC);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index 72ba8460116f..662604b138d2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -24,7 +24,7 @@ def riscv_remuw : SDNode<"RISCVISD::REMUW", SDT_RISCVIntBinOpW>;
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtM] in {
+let Predicates = [HasStdExtMOrZmmul] in {
def MUL : ALU_rr<0b0000001, 0b000, "mul", /*Commutable*/1>,
Sched<[WriteIMul, ReadIMul, ReadIMul]>;
def MULH : ALU_rr<0b0000001, 0b001, "mulh", /*Commutable*/1>,
@@ -33,6 +33,9 @@ def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">,
Sched<[WriteIMul, ReadIMul, ReadIMul]>;
def MULHU : ALU_rr<0b0000001, 0b011, "mulhu", /*Commutable*/1>,
Sched<[WriteIMul, ReadIMul, ReadIMul]>;
+} // Predicates = [HasStdExtMOrZmmul]
+
+let Predicates = [HasStdExtM] in {
def DIV : ALU_rr<0b0000001, 0b100, "div">,
Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
def DIVU : ALU_rr<0b0000001, 0b101, "divu">,
@@ -43,9 +46,12 @@ def REMU : ALU_rr<0b0000001, 0b111, "remu">,
Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
} // Predicates = [HasStdExtM]
-let Predicates = [HasStdExtM, IsRV64] in {
+let Predicates = [HasStdExtMOrZmmul, IsRV64] in {
def MULW : ALUW_rr<0b0000001, 0b000, "mulw", /*Commutable*/1>,
Sched<[WriteIMul32, ReadIMul32, ReadIMul32]>;
+} // Predicates = [HasStdExtMOrZmmul, IsRV64]
+
+let Predicates = [HasStdExtM, IsRV64] in {
def DIVW : ALUW_rr<0b0000001, 0b100, "divw">,
Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">,
@@ -60,21 +66,25 @@ def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">,
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtM] in {
+let Predicates = [HasStdExtMOrZmmul] in {
def : PatGprGpr<mul, MUL>;
def : PatGprGpr<mulhs, MULH>;
def : PatGprGpr<mulhu, MULHU>;
def : PatGprGpr<riscv_mulhsu, MULHSU>;
+} // Predicates = [HasStdExtMOrZmmul]
+
+let Predicates = [HasStdExtM] in {
def : PatGprGpr<sdiv, DIV>;
def : PatGprGpr<udiv, DIVU>;
def : PatGprGpr<srem, REM>;
def : PatGprGpr<urem, REMU>;
} // Predicates = [HasStdExtM]
-let Predicates = [HasStdExtM, IsRV64] in {
// Select W instructions if only the lower 32-bits of the result are used.
+let Predicates = [HasStdExtMOrZmmul, IsRV64] in
def : PatGprGpr<binop_allwusers<mul>, MULW>;
+let Predicates = [HasStdExtM, IsRV64] in {
def : PatGprGpr<riscv_divw, DIVW>;
def : PatGprGpr<riscv_divuw, DIVUW>;
def : PatGprGpr<riscv_remuw, REMUW>;
@@ -96,11 +106,11 @@ def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))),
(REMW GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtM, IsRV64]
-let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in {
+let Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba] in {
// Special case for calculating the full 64-bit product of a 32x32 unsigned
// multiply where the inputs aren't known to be zero extended. We can shift the
// inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish
// zeroing the upper 32 bits.
def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
(MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
-} // Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba]
+} // Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba]
diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
index dadf8f81a2c0..920729e9ebbf 100644
--- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
@@ -443,8 +443,7 @@ bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MI = &*I++;
// We're looking for the sext.w pattern ADDIW rd, rs1, 0.
- if (MI->getOpcode() != RISCV::ADDIW || !MI->getOperand(2).isImm() ||
- MI->getOperand(2).getImm() != 0 || !MI->getOperand(1).isReg())
+ if (!RISCV::isSEXT_W(*MI))
continue;
// Input should be a virtual register.
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 7589b44b81d3..0446edefa979 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -202,11 +202,9 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
}
bool RISCVSubtarget::enableSubRegLiveness() const {
- if (EnableSubRegLiveness.getNumOccurrences())
- return EnableSubRegLiveness;
- // Enable subregister liveness for RVV to better handle LMUL>1 and segment
- // load/store.
- return hasVInstructions();
+ // FIXME: Enable subregister liveness by default for RVV to better handle
+ // LMUL>1 and segment load/store.
+ return EnableSubRegLiveness;
}
void RISCVSubtarget::getPostRAMutations(
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 831f7fadaa62..6eb949fa551c 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -89,6 +89,7 @@ private:
bool HasStdExtZicbom = false;
bool HasStdExtZicboz = false;
bool HasStdExtZicbop = false;
+ bool HasStdExtZmmul = false;
bool HasRV64 = false;
bool IsRV32E = false;
bool EnableLinkerRelax = false;
@@ -184,6 +185,7 @@ public:
bool hasStdExtZicbom() const { return HasStdExtZicbom; }
bool hasStdExtZicboz() const { return HasStdExtZicboz; }
bool hasStdExtZicbop() const { return HasStdExtZicbop; }
+ bool hasStdExtZmmul() const { return HasStdExtZmmul; }
bool is64Bit() const { return HasRV64; }
bool isRV32E() const { return IsRV32E; }
bool enableLinkerRelax() const { return EnableLinkerRelax; }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b2707b753e87..50fcb00e6c63 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -49,6 +49,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeGlobalISel(*PR);
initializeRISCVMakeCompressibleOptPass(*PR);
initializeRISCVGatherScatterLoweringPass(*PR);
+ initializeRISCVCodeGenPreparePass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
initializeRISCVSExtWRemovalPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
@@ -187,7 +188,11 @@ TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
void RISCVPassConfig::addIRPasses() {
addPass(createAtomicExpandPass());
- addPass(createRISCVGatherScatterLoweringPass());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createRISCVGatherScatterLoweringPass());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createRISCVCodeGenPreparePass());
TargetPassConfig::addIRPasses();
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 29d3c5e491de..f9cd5ffb512b 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -65,7 +65,7 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
return TTI::TCC_Free;
// zext.w
- if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZbb())
+ if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
return TTI::TCC_Free;
LLVM_FALLTHROUGH;
case Instruction::Add:
@@ -198,6 +198,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// vid.v v9
// vrsub.vx v10, v9, a0
// vrgather.vv v9, v8, v10
+ if (Tp->getElementType()->isIntegerTy(1))
+ // Mask operation additionally required extend and truncate
+ return LT.first * 9;
return LT.first * 6;
}
}
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp
index 1a3e35a5f901..220fd76305aa 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp
@@ -1068,5 +1068,15 @@ StringRef getKernelProfilingInfoName(KernelProfilingInfo e) {
}
llvm_unreachable("Unexpected operand");
}
+
+std::string getExtInstSetName(InstructionSet e) {
+ switch (e) {
+ CASE(InstructionSet, OpenCL_std)
+ CASE(InstructionSet, GLSL_std_450)
+ CASE(InstructionSet, SPV_AMD_shader_trinary_minmax)
+ break;
+ }
+ llvm_unreachable("Unexpected operand");
+}
} // namespace SPIRV
} // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h
index 2aa9f076c78e..9482723993a2 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h
@@ -706,6 +706,19 @@ enum class KernelProfilingInfo : uint32_t {
CmdExecTime = 0x1,
};
StringRef getKernelProfilingInfoName(KernelProfilingInfo e);
+
+enum class InstructionSet : uint32_t {
+ OpenCL_std = 0,
+ GLSL_std_450 = 1,
+ SPV_AMD_shader_trinary_minmax = 2,
+};
+std::string getExtInstSetName(InstructionSet e);
+
+// TODO: implement other mnemonics.
+enum class Opcode : uint32_t {
+ InBoundsPtrAccessChain = 70,
+ PtrCastToGeneric = 121,
+};
} // namespace SPIRV
} // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
index 3105baa02c90..d60e61f36270 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp
@@ -59,7 +59,7 @@ void SPIRVInstPrinter::printOpConstantVarOps(const MCInst *MI,
}
void SPIRVInstPrinter::recordOpExtInstImport(const MCInst *MI) {
- llvm_unreachable("Unimplemented recordOpExtInstImport");
+ // TODO: insert {Reg, Set} into ExtInstSetIDs map.
}
void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address,
@@ -176,7 +176,18 @@ void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address,
}
void SPIRVInstPrinter::printOpExtInst(const MCInst *MI, raw_ostream &O) {
- llvm_unreachable("Unimplemented printOpExtInst");
+ // The fixed operands have already been printed, so just need to decide what
+ // type of ExtInst operands to print based on the instruction set and number.
+ MCInstrDesc MCDesc = MII.get(MI->getOpcode());
+ unsigned NumFixedOps = MCDesc.getNumOperands();
+ const auto NumOps = MI->getNumOperands();
+ if (NumOps == NumFixedOps)
+ return;
+
+ O << ' ';
+
+ // TODO: implement special printing for OpenCLExtInst::vstor*.
+ printRemainingVariableOps(MI, NumFixedOps, O, true);
}
void SPIRVInstPrinter::printOpDecorate(const MCInst *MI, raw_ostream &O) {
diff --git a/llvm/lib/Target/SPIRV/SPIRV.h b/llvm/lib/Target/SPIRV/SPIRV.h
index 8da54a5d6e61..5a7f2e51afb8 100644
--- a/llvm/lib/Target/SPIRV/SPIRV.h
+++ b/llvm/lib/Target/SPIRV/SPIRV.h
@@ -19,6 +19,7 @@ class SPIRVSubtarget;
class InstructionSelector;
class RegisterBankInfo;
+ModulePass *createSPIRVPrepareFunctionsPass();
FunctionPass *createSPIRVPreLegalizerPass();
FunctionPass *createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM);
InstructionSelector *
diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
index 605bf949187f..6d60bd5e3c97 100644
--- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
@@ -21,6 +21,7 @@
#include "SPIRVUtils.h"
#include "TargetInfo/SPIRVTargetInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -58,9 +59,14 @@ public:
void outputModuleSection(SPIRV::ModuleSectionType MSType);
void outputEntryPoints();
void outputDebugSourceAndStrings(const Module &M);
+ void outputOpExtInstImports(const Module &M);
void outputOpMemoryModel();
void outputOpFunctionEnd();
void outputExtFuncDecls();
+ void outputExecutionModeFromMDNode(Register Reg, MDNode *Node,
+ SPIRV::ExecutionMode EM);
+ void outputExecutionMode(const Module &M);
+ void outputAnnotations(const Module &M);
void outputModuleSections();
void emitInstruction(const MachineInstr *MI) override;
@@ -127,6 +133,8 @@ void SPIRVAsmPrinter::emitFunctionBodyEnd() {
}
void SPIRVAsmPrinter::emitOpLabel(const MachineBasicBlock &MBB) {
+ if (MAI->MBBsToSkip.contains(&MBB))
+ return;
MCInst LabelInst;
LabelInst.setOpcode(SPIRV::OpLabel);
LabelInst.addOperand(MCOperand::createReg(MAI->getOrCreateMBBRegister(MBB)));
@@ -237,6 +245,13 @@ void SPIRVAsmPrinter::outputModuleSection(SPIRV::ModuleSectionType MSType) {
}
void SPIRVAsmPrinter::outputDebugSourceAndStrings(const Module &M) {
+ // Output OpSourceExtensions.
+ for (auto &Str : MAI->SrcExt) {
+ MCInst Inst;
+ Inst.setOpcode(SPIRV::OpSourceExtension);
+ addStringImm(Str.first(), Inst);
+ outputMCInst(Inst);
+ }
// Output OpSource.
MCInst Inst;
Inst.setOpcode(SPIRV::OpSource);
@@ -246,6 +261,19 @@ void SPIRVAsmPrinter::outputDebugSourceAndStrings(const Module &M) {
outputMCInst(Inst);
}
+void SPIRVAsmPrinter::outputOpExtInstImports(const Module &M) {
+ for (auto &CU : MAI->ExtInstSetMap) {
+ unsigned Set = CU.first;
+ Register Reg = CU.second;
+ MCInst Inst;
+ Inst.setOpcode(SPIRV::OpExtInstImport);
+ Inst.addOperand(MCOperand::createReg(Reg));
+ addStringImm(getExtInstSetName(static_cast<SPIRV::InstructionSet>(Set)),
+ Inst);
+ outputMCInst(Inst);
+ }
+}
+
void SPIRVAsmPrinter::outputOpMemoryModel() {
MCInst Inst;
Inst.setOpcode(SPIRV::OpMemoryModel);
@@ -301,6 +329,135 @@ void SPIRVAsmPrinter::outputExtFuncDecls() {
}
}
+// Encode LLVM type by SPIR-V execution mode VecTypeHint.
+static unsigned encodeVecTypeHint(Type *Ty) {
+ if (Ty->isHalfTy())
+ return 4;
+ if (Ty->isFloatTy())
+ return 5;
+ if (Ty->isDoubleTy())
+ return 6;
+ if (IntegerType *IntTy = dyn_cast<IntegerType>(Ty)) {
+ switch (IntTy->getIntegerBitWidth()) {
+ case 8:
+ return 0;
+ case 16:
+ return 1;
+ case 32:
+ return 2;
+ case 64:
+ return 3;
+ default:
+ llvm_unreachable("invalid integer type");
+ }
+ }
+ if (FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty)) {
+ Type *EleTy = VecTy->getElementType();
+ unsigned Size = VecTy->getNumElements();
+ return Size << 16 | encodeVecTypeHint(EleTy);
+ }
+ llvm_unreachable("invalid type");
+}
+
+static void addOpsFromMDNode(MDNode *MDN, MCInst &Inst,
+ SPIRV::ModuleAnalysisInfo *MAI) {
+ for (const MDOperand &MDOp : MDN->operands()) {
+ if (auto *CMeta = dyn_cast<ConstantAsMetadata>(MDOp)) {
+ Constant *C = CMeta->getValue();
+ if (ConstantInt *Const = dyn_cast<ConstantInt>(C)) {
+ Inst.addOperand(MCOperand::createImm(Const->getZExtValue()));
+ } else if (auto *CE = dyn_cast<Function>(C)) {
+ Register FuncReg = MAI->getFuncReg(CE->getName().str());
+ assert(FuncReg.isValid());
+ Inst.addOperand(MCOperand::createReg(FuncReg));
+ }
+ }
+ }
+}
+
+void SPIRVAsmPrinter::outputExecutionModeFromMDNode(Register Reg, MDNode *Node,
+ SPIRV::ExecutionMode EM) {
+ MCInst Inst;
+ Inst.setOpcode(SPIRV::OpExecutionMode);
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createImm(static_cast<unsigned>(EM)));
+ addOpsFromMDNode(Node, Inst, MAI);
+ outputMCInst(Inst);
+}
+
+void SPIRVAsmPrinter::outputExecutionMode(const Module &M) {
+ NamedMDNode *Node = M.getNamedMetadata("spirv.ExecutionMode");
+ if (Node) {
+ for (unsigned i = 0; i < Node->getNumOperands(); i++) {
+ MCInst Inst;
+ Inst.setOpcode(SPIRV::OpExecutionMode);
+ addOpsFromMDNode(cast<MDNode>(Node->getOperand(i)), Inst, MAI);
+ outputMCInst(Inst);
+ }
+ }
+ for (auto FI = M.begin(), E = M.end(); FI != E; ++FI) {
+ const Function &F = *FI;
+ if (F.isDeclaration())
+ continue;
+ Register FReg = MAI->getFuncReg(F.getGlobalIdentifier());
+ assert(FReg.isValid());
+ if (MDNode *Node = F.getMetadata("reqd_work_group_size"))
+ outputExecutionModeFromMDNode(FReg, Node,
+ SPIRV::ExecutionMode::LocalSize);
+ if (MDNode *Node = F.getMetadata("work_group_size_hint"))
+ outputExecutionModeFromMDNode(FReg, Node,
+ SPIRV::ExecutionMode::LocalSizeHint);
+ if (MDNode *Node = F.getMetadata("intel_reqd_sub_group_size"))
+ outputExecutionModeFromMDNode(FReg, Node,
+ SPIRV::ExecutionMode::SubgroupSize);
+ if (MDNode *Node = F.getMetadata("vec_type_hint")) {
+ MCInst Inst;
+ Inst.setOpcode(SPIRV::OpExecutionMode);
+ Inst.addOperand(MCOperand::createReg(FReg));
+ unsigned EM = static_cast<unsigned>(SPIRV::ExecutionMode::VecTypeHint);
+ Inst.addOperand(MCOperand::createImm(EM));
+ unsigned TypeCode = encodeVecTypeHint(getMDOperandAsType(Node, 0));
+ Inst.addOperand(MCOperand::createImm(TypeCode));
+ outputMCInst(Inst);
+ }
+ }
+}
+
+void SPIRVAsmPrinter::outputAnnotations(const Module &M) {
+ outputModuleSection(SPIRV::MB_Annotations);
+ // Process llvm.global.annotations special global variable.
+ for (auto F = M.global_begin(), E = M.global_end(); F != E; ++F) {
+ if ((*F).getName() != "llvm.global.annotations")
+ continue;
+ const GlobalVariable *V = &(*F);
+ const ConstantArray *CA = cast<ConstantArray>(V->getOperand(0));
+ for (Value *Op : CA->operands()) {
+ ConstantStruct *CS = cast<ConstantStruct>(Op);
+ // The first field of the struct contains a pointer to
+ // the annotated variable.
+ Value *AnnotatedVar = CS->getOperand(0)->stripPointerCasts();
+ if (!isa<Function>(AnnotatedVar))
+ llvm_unreachable("Unsupported value in llvm.global.annotations");
+ Function *Func = cast<Function>(AnnotatedVar);
+ Register Reg = MAI->getFuncReg(Func->getGlobalIdentifier());
+
+ // The second field contains a pointer to a global annotation string.
+ GlobalVariable *GV =
+ cast<GlobalVariable>(CS->getOperand(1)->stripPointerCasts());
+
+ StringRef AnnotationString;
+ getConstantStringInfo(GV, AnnotationString);
+ MCInst Inst;
+ Inst.setOpcode(SPIRV::OpDecorate);
+ Inst.addOperand(MCOperand::createReg(Reg));
+ unsigned Dec = static_cast<unsigned>(SPIRV::Decoration::UserSemantic);
+ Inst.addOperand(MCOperand::createImm(Dec));
+ addStringImm(AnnotationString, Inst);
+ outputMCInst(Inst);
+ }
+ }
+}
+
void SPIRVAsmPrinter::outputModuleSections() {
const Module *M = MMI->getModule();
// Get the global subtarget to output module-level info.
@@ -311,13 +468,14 @@ void SPIRVAsmPrinter::outputModuleSections() {
// Output instructions according to the Logical Layout of a Module:
// TODO: 1,2. All OpCapability instructions, then optional OpExtension
// instructions.
- // TODO: 3. Optional OpExtInstImport instructions.
+ // 3. Optional OpExtInstImport instructions.
+ outputOpExtInstImports(*M);
// 4. The single required OpMemoryModel instruction.
outputOpMemoryModel();
// 5. All entry point declarations, using OpEntryPoint.
outputEntryPoints();
// 6. Execution-mode declarations, using OpExecutionMode or OpExecutionModeId.
- // TODO:
+ outputExecutionMode(*M);
// 7a. Debug: all OpString, OpSourceExtension, OpSource, and
// OpSourceContinued, without forward references.
outputDebugSourceAndStrings(*M);
@@ -326,7 +484,7 @@ void SPIRVAsmPrinter::outputModuleSections() {
// 7c. Debug: all OpModuleProcessed instructions.
outputModuleSection(SPIRV::MB_DebugModuleProcessed);
// 8. All annotation instructions (all decorations).
- outputModuleSection(SPIRV::MB_Annotations);
+ outputAnnotations(*M);
// 9. All type declarations (OpTypeXXX instructions), all constant
// instructions, and all global variable declarations. This section is
// the first section to allow use of: OpLine and OpNoLine debug information;
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 5b6b82aebf30..e8fedfeffde7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -24,9 +24,8 @@
using namespace llvm;
SPIRVCallLowering::SPIRVCallLowering(const SPIRVTargetLowering &TLI,
- const SPIRVSubtarget &ST,
SPIRVGlobalRegistry *GR)
- : CallLowering(&TLI), ST(ST), GR(GR) {}
+ : CallLowering(&TLI), GR(GR) {}
bool SPIRVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, ArrayRef<Register> VRegs,
@@ -36,11 +35,13 @@ bool SPIRVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
// TODO: handle the case of multiple registers.
if (VRegs.size() > 1)
return false;
- if (Val)
+ if (Val) {
+ const auto &STI = MIRBuilder.getMF().getSubtarget();
return MIRBuilder.buildInstr(SPIRV::OpReturnValue)
.addUse(VRegs[0])
- .constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(),
- *ST.getRegBankInfo());
+ .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+ *STI.getRegBankInfo());
+ }
MIRBuilder.buildInstr(SPIRV::OpReturn);
return true;
}
@@ -63,6 +64,56 @@ static uint32_t getFunctionControl(const Function &F) {
return FuncControl;
}
+static ConstantInt *getConstInt(MDNode *MD, unsigned NumOp) {
+ if (MD->getNumOperands() > NumOp) {
+ auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(NumOp));
+ if (CMeta)
+ return dyn_cast<ConstantInt>(CMeta->getValue());
+ }
+ return nullptr;
+}
+
+// This code restores function args/retvalue types for composite cases
+// because the final types should still be aggregate whereas they're i32
+// during the translation to cope with aggregate flattening etc.
+static FunctionType *getOriginalFunctionType(const Function &F) {
+ auto *NamedMD = F.getParent()->getNamedMetadata("spv.cloned_funcs");
+ if (NamedMD == nullptr)
+ return F.getFunctionType();
+
+ Type *RetTy = F.getFunctionType()->getReturnType();
+ SmallVector<Type *, 4> ArgTypes;
+ for (auto &Arg : F.args())
+ ArgTypes.push_back(Arg.getType());
+
+ auto ThisFuncMDIt =
+ std::find_if(NamedMD->op_begin(), NamedMD->op_end(), [&F](MDNode *N) {
+ return isa<MDString>(N->getOperand(0)) &&
+ cast<MDString>(N->getOperand(0))->getString() == F.getName();
+ });
+ // TODO: probably one function can have numerous type mutations,
+ // so we should support this.
+ if (ThisFuncMDIt != NamedMD->op_end()) {
+ auto *ThisFuncMD = *ThisFuncMDIt;
+ MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(1));
+ assert(MD && "MDNode operand is expected");
+ ConstantInt *Const = getConstInt(MD, 0);
+ if (Const) {
+ auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1));
+ assert(CMeta && "ConstantAsMetadata operand is expected");
+ assert(Const->getSExtValue() >= -1);
+ // Currently -1 indicates return value, greater values mean
+ // argument numbers.
+ if (Const->getSExtValue() == -1)
+ RetTy = CMeta->getType();
+ else
+ ArgTypes[Const->getSExtValue()] = CMeta->getType();
+ }
+ }
+
+ return FunctionType::get(RetTy, ArgTypes, F.isVarArg());
+}
+
bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<ArrayRef<Register>> VRegs,
@@ -71,7 +122,8 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
GR->setCurrentFunc(MIRBuilder.getMF());
// Assign types and names to all args, and store their types for later.
- SmallVector<Register, 4> ArgTypeVRegs;
+ FunctionType *FTy = getOriginalFunctionType(F);
+ SmallVector<SPIRVType *, 4> ArgTypeVRegs;
if (VRegs.size() > 0) {
unsigned i = 0;
for (const auto &Arg : F.args()) {
@@ -79,9 +131,18 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
// TODO: handle the case of multiple registers.
if (VRegs[i].size() > 1)
return false;
- auto *SpirvTy =
- GR->assignTypeToVReg(Arg.getType(), VRegs[i][0], MIRBuilder);
- ArgTypeVRegs.push_back(GR->getSPIRVTypeID(SpirvTy));
+ Type *ArgTy = FTy->getParamType(i);
+ SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite;
+ MDNode *Node = F.getMetadata("kernel_arg_access_qual");
+ if (Node && i < Node->getNumOperands()) {
+ StringRef AQString = cast<MDString>(Node->getOperand(i))->getString();
+ if (AQString.compare("read_only") == 0)
+ AQ = SPIRV::AccessQualifier::ReadOnly;
+ else if (AQString.compare("write_only") == 0)
+ AQ = SPIRV::AccessQualifier::WriteOnly;
+ }
+ auto *SpirvTy = GR->assignTypeToVReg(ArgTy, VRegs[i][0], MIRBuilder, AQ);
+ ArgTypeVRegs.push_back(SpirvTy);
if (Arg.hasName())
buildOpName(VRegs[i][0], Arg.getName(), MIRBuilder);
@@ -92,8 +153,10 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
SPIRV::Decoration::MaxByteOffset, {DerefBytes});
}
if (Arg.hasAttribute(Attribute::Alignment)) {
+ auto Alignment = static_cast<unsigned>(
+ Arg.getAttribute(Attribute::Alignment).getValueAsInt());
buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::Alignment,
- {static_cast<unsigned>(Arg.getParamAlignment())});
+ {Alignment});
}
if (Arg.hasAttribute(Attribute::ReadOnly)) {
auto Attr =
@@ -107,6 +170,38 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
buildOpDecorate(VRegs[i][0], MIRBuilder,
SPIRV::Decoration::FuncParamAttr, {Attr});
}
+ if (Arg.hasAttribute(Attribute::NoAlias)) {
+ auto Attr =
+ static_cast<unsigned>(SPIRV::FunctionParameterAttribute::NoAlias);
+ buildOpDecorate(VRegs[i][0], MIRBuilder,
+ SPIRV::Decoration::FuncParamAttr, {Attr});
+ }
+ Node = F.getMetadata("kernel_arg_type_qual");
+ if (Node && i < Node->getNumOperands()) {
+ StringRef TypeQual = cast<MDString>(Node->getOperand(i))->getString();
+ if (TypeQual.compare("volatile") == 0)
+ buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::Volatile,
+ {});
+ }
+ Node = F.getMetadata("spirv.ParameterDecorations");
+ if (Node && i < Node->getNumOperands() &&
+ isa<MDNode>(Node->getOperand(i))) {
+ MDNode *MD = cast<MDNode>(Node->getOperand(i));
+ for (const MDOperand &MDOp : MD->operands()) {
+ MDNode *MD2 = dyn_cast<MDNode>(MDOp);
+ assert(MD2 && "Metadata operand is expected");
+ ConstantInt *Const = getConstInt(MD2, 0);
+ assert(Const && "MDOperand should be ConstantInt");
+ auto Dec = static_cast<SPIRV::Decoration>(Const->getZExtValue());
+ std::vector<uint32_t> DecVec;
+ for (unsigned j = 1; j < MD2->getNumOperands(); j++) {
+ ConstantInt *Const = getConstInt(MD2, j);
+ assert(Const && "MDOperand should be ConstantInt");
+ DecVec.push_back(static_cast<uint32_t>(Const->getZExtValue()));
+ }
+ buildOpDecorate(VRegs[i][0], MIRBuilder, Dec, DecVec);
+ }
+ }
++i;
}
}
@@ -117,30 +212,30 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
MRI->setRegClass(FuncVReg, &SPIRV::IDRegClass);
if (F.isDeclaration())
GR->add(&F, &MIRBuilder.getMF(), FuncVReg);
-
- auto *FTy = F.getFunctionType();
- auto FuncTy = GR->assignTypeToVReg(FTy, FuncVReg, MIRBuilder);
+ SPIRVType *RetTy = GR->getOrCreateSPIRVType(FTy->getReturnType(), MIRBuilder);
+ SPIRVType *FuncTy = GR->getOrCreateOpTypeFunctionWithArgs(
+ FTy, RetTy, ArgTypeVRegs, MIRBuilder);
// Build the OpTypeFunction declaring it.
- Register ReturnTypeID = FuncTy->getOperand(1).getReg();
uint32_t FuncControl = getFunctionControl(F);
MIRBuilder.buildInstr(SPIRV::OpFunction)
.addDef(FuncVReg)
- .addUse(ReturnTypeID)
+ .addUse(GR->getSPIRVTypeID(RetTy))
.addImm(FuncControl)
.addUse(GR->getSPIRVTypeID(FuncTy));
// Add OpFunctionParameters.
- const unsigned NumArgs = ArgTypeVRegs.size();
- for (unsigned i = 0; i < NumArgs; ++i) {
+ int i = 0;
+ for (const auto &Arg : F.args()) {
assert(VRegs[i].size() == 1 && "Formal arg has multiple vregs");
MRI->setRegClass(VRegs[i][0], &SPIRV::IDRegClass);
MIRBuilder.buildInstr(SPIRV::OpFunctionParameter)
.addDef(VRegs[i][0])
- .addUse(ArgTypeVRegs[i]);
+ .addUse(GR->getSPIRVTypeID(ArgTypeVRegs[i]));
if (F.isDeclaration())
- GR->add(F.getArg(i), &MIRBuilder.getMF(), VRegs[i][0]);
+ GR->add(&Arg, &MIRBuilder.getMF(), VRegs[i][0]);
+ i++;
}
// Name the function.
if (F.hasName())
@@ -169,48 +264,51 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// TODO: handle the case of multiple registers.
if (Info.OrigRet.Regs.size() > 1)
return false;
+ MachineFunction &MF = MIRBuilder.getMF();
+ GR->setCurrentFunc(MF);
+ FunctionType *FTy = nullptr;
+ const Function *CF = nullptr;
- GR->setCurrentFunc(MIRBuilder.getMF());
- Register ResVReg =
- Info.OrigRet.Regs.empty() ? Register(0) : Info.OrigRet.Regs[0];
// Emit a regular OpFunctionCall. If it's an externally declared function,
- // be sure to emit its type and function declaration here. It will be
- // hoisted globally later.
+ // be sure to emit its type and function declaration here. It will be hoisted
+ // globally later.
if (Info.Callee.isGlobal()) {
- auto *CF = dyn_cast_or_null<const Function>(Info.Callee.getGlobal());
+ CF = dyn_cast_or_null<const Function>(Info.Callee.getGlobal());
// TODO: support constexpr casts and indirect calls.
if (CF == nullptr)
return false;
- if (CF->isDeclaration()) {
- // Emit the type info and forward function declaration to the first MBB
- // to ensure VReg definition dependencies are valid across all MBBs.
- MachineBasicBlock::iterator OldII = MIRBuilder.getInsertPt();
- MachineBasicBlock &OldBB = MIRBuilder.getMBB();
- MachineBasicBlock &FirstBB = *MIRBuilder.getMF().getBlockNumbered(0);
- MIRBuilder.setInsertPt(FirstBB, FirstBB.instr_end());
-
- SmallVector<ArrayRef<Register>, 8> VRegArgs;
- SmallVector<SmallVector<Register, 1>, 8> ToInsert;
- for (const Argument &Arg : CF->args()) {
- if (MIRBuilder.getDataLayout().getTypeStoreSize(Arg.getType()).isZero())
- continue; // Don't handle zero sized types.
- ToInsert.push_back({MIRBuilder.getMRI()->createGenericVirtualRegister(
- LLT::scalar(32))});
- VRegArgs.push_back(ToInsert.back());
- }
- // TODO: Reuse FunctionLoweringInfo.
- FunctionLoweringInfo FuncInfo;
- lowerFormalArguments(MIRBuilder, *CF, VRegArgs, FuncInfo);
- MIRBuilder.setInsertPt(OldBB, OldII);
+ FTy = getOriginalFunctionType(*CF);
+ }
+
+ Register ResVReg =
+ Info.OrigRet.Regs.empty() ? Register(0) : Info.OrigRet.Regs[0];
+ if (CF && CF->isDeclaration() &&
+ !GR->find(CF, &MIRBuilder.getMF()).isValid()) {
+ // Emit the type info and forward function declaration to the first MBB
+ // to ensure VReg definition dependencies are valid across all MBBs.
+ MachineIRBuilder FirstBlockBuilder;
+ FirstBlockBuilder.setMF(MF);
+ FirstBlockBuilder.setMBB(*MF.getBlockNumbered(0));
+
+ SmallVector<ArrayRef<Register>, 8> VRegArgs;
+ SmallVector<SmallVector<Register, 1>, 8> ToInsert;
+ for (const Argument &Arg : CF->args()) {
+ if (MIRBuilder.getDataLayout().getTypeStoreSize(Arg.getType()).isZero())
+ continue; // Don't handle zero sized types.
+ ToInsert.push_back(
+ {MIRBuilder.getMRI()->createGenericVirtualRegister(LLT::scalar(32))});
+ VRegArgs.push_back(ToInsert.back());
}
+ // TODO: Reuse FunctionLoweringInfo
+ FunctionLoweringInfo FuncInfo;
+ lowerFormalArguments(FirstBlockBuilder, *CF, VRegArgs, FuncInfo);
}
// Make sure there's a valid return reg, even for functions returning void.
- if (!ResVReg.isValid()) {
+ if (!ResVReg.isValid())
ResVReg = MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::IDRegClass);
- }
SPIRVType *RetType =
- GR->assignTypeToVReg(Info.OrigRet.Ty, ResVReg, MIRBuilder);
+ GR->assignTypeToVReg(FTy->getReturnType(), ResVReg, MIRBuilder);
// Emit the OpFunctionCall and its args.
auto MIB = MIRBuilder.buildInstr(SPIRV::OpFunctionCall)
@@ -224,6 +322,7 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
MIB.addUse(Arg.Regs[0]);
}
- return MIB.constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(),
- *ST.getRegBankInfo());
+ const auto &STI = MF.getSubtarget();
+ return MIB.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+ *STI.getRegBankInfo());
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.h b/llvm/lib/Target/SPIRV/SPIRVCallLowering.h
index c179bb35154b..c2d6ad82d507 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.h
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.h
@@ -13,23 +13,21 @@
#ifndef LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H
#define LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H
+#include "SPIRVGlobalRegistry.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
namespace llvm {
class SPIRVGlobalRegistry;
-class SPIRVSubtarget;
class SPIRVTargetLowering;
class SPIRVCallLowering : public CallLowering {
private:
- const SPIRVSubtarget &ST;
// Used to create and assign function, argument, and return type information.
SPIRVGlobalRegistry *GR;
public:
- SPIRVCallLowering(const SPIRVTargetLowering &TLI, const SPIRVSubtarget &ST,
- SPIRVGlobalRegistry *GR);
+ SPIRVCallLowering(const SPIRVTargetLowering &TLI, SPIRVGlobalRegistry *GR);
// Built OpReturn or OpReturnValue.
bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
index 57cd4bafd351..1926977ea66e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp
@@ -92,4 +92,4 @@ void SPIRVGeneralDuplicatesTracker::buildDepsGraph(
}
}
}
-} \ No newline at end of file
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h
index 58ae1f86ce42..ab22c3d2a647 100644
--- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h
+++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h
@@ -169,6 +169,8 @@ public:
Register find(const Argument *Arg, const MachineFunction *MF) {
return AT.find(const_cast<Argument *>(Arg), MF);
}
+
+ const SPIRVDuplicatesTracker<Type> *getTypes() { return &TT; }
};
} // namespace llvm
-#endif \ No newline at end of file
+#endif // LLVM_LIB_TARGET_SPIRV_SPIRVDUPLICATESTRACKER_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 9624482e3622..0075f547b6d6 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -87,6 +87,7 @@ public:
Instruction *visitLoadInst(LoadInst &I);
Instruction *visitStoreInst(StoreInst &I);
Instruction *visitAllocaInst(AllocaInst &I);
+ Instruction *visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
bool runOnFunction(Function &F) override;
};
} // namespace
@@ -103,7 +104,7 @@ static inline bool isAssignTypeInstr(const Instruction *I) {
static bool isMemInstrToReplace(Instruction *I) {
return isa<StoreInst>(I) || isa<LoadInst>(I) || isa<InsertValueInst>(I) ||
- isa<ExtractValueInst>(I);
+ isa<ExtractValueInst>(I) || isa<AtomicCmpXchgInst>(I);
}
static bool isAggrToReplace(const Value *V) {
@@ -134,13 +135,14 @@ void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
Instruction *New) {
while (!Old->user_empty()) {
auto *U = Old->user_back();
- if (isMemInstrToReplace(U) || isa<ReturnInst>(U)) {
- U->replaceUsesOfWith(Old, New);
- } else if (isAssignTypeInstr(U)) {
+ if (isAssignTypeInstr(U)) {
IRB->SetInsertPoint(U);
SmallVector<Value *, 2> Args = {New, U->getOperand(1)};
IRB->CreateIntrinsic(Intrinsic::spv_assign_type, {New->getType()}, Args);
U->eraseFromParent();
+ } else if (isMemInstrToReplace(U) || isa<ReturnInst>(U) ||
+ isa<CallInst>(U)) {
+ U->replaceUsesOfWith(Old, New);
} else {
llvm_unreachable("illegal aggregate intrinsic user");
}
@@ -301,10 +303,10 @@ Instruction *SPIRVEmitIntrinsics::visitStoreInst(StoreInst &I) {
MachineMemOperand::Flags Flags =
TLI->getStoreMemOperandFlags(I, F->getParent()->getDataLayout());
auto *PtrOp = I.getPointerOperand();
- auto *NewI =
- IRB->CreateIntrinsic(Intrinsic::spv_store, {PtrOp->getType()},
- {I.getValueOperand(), PtrOp, IRB->getInt16(Flags),
- IRB->getInt8(I.getAlign().value())});
+ auto *NewI = IRB->CreateIntrinsic(
+ Intrinsic::spv_store, {I.getValueOperand()->getType(), PtrOp->getType()},
+ {I.getValueOperand(), PtrOp, IRB->getInt16(Flags),
+ IRB->getInt8(I.getAlign().value())});
I.eraseFromParent();
return NewI;
}
@@ -314,6 +316,22 @@ Instruction *SPIRVEmitIntrinsics::visitAllocaInst(AllocaInst &I) {
return &I;
}
+Instruction *SPIRVEmitIntrinsics::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
+ assert(I.getType()->isAggregateType() && "Aggregate result is expected");
+ SmallVector<Value *> Args;
+ for (auto &Op : I.operands())
+ Args.push_back(Op);
+ Args.push_back(IRB->getInt32(I.getSyncScopeID()));
+ Args.push_back(IRB->getInt32(
+ static_cast<uint32_t>(getMemSemantics(I.getSuccessOrdering()))));
+ Args.push_back(IRB->getInt32(
+ static_cast<uint32_t>(getMemSemantics(I.getFailureOrdering()))));
+ auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_cmpxchg,
+ {I.getPointerOperand()->getType()}, {Args});
+ replaceMemInstrUses(&I, NewI);
+ return NewI;
+}
+
void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV) {
// Skip special artifical variable llvm.global.annotations.
if (GV.getName() == "llvm.global.annotations")
@@ -351,14 +369,13 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) {
// Check GetElementPtrConstantExpr case.
(isa<ConstantExpr>(Op) && isa<GEPOperator>(Op))) {
IRB->SetInsertPoint(I);
- buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op);
+ if (isa<UndefValue>(Op) && Op->getType()->isAggregateType())
+ buildIntrWithMD(Intrinsic::spv_assign_type, {IRB->getInt32Ty()}, Op,
+ UndefValue::get(IRB->getInt32Ty()));
+ else
+ buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op);
}
}
- // StoreInst's operand type can be changed in the next stage so we need to
- // store it in the set.
- if (isa<StoreInst>(I) &&
- cast<StoreInst>(I)->getValueOperand()->getType()->isAggregateType())
- AggrStores.insert(I);
}
void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I) {
@@ -378,7 +395,7 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I) {
if ((isa<ConstantAggregateZero>(Op) && Op->getType()->isVectorTy()) ||
isa<PHINode>(I) || isa<SwitchInst>(I))
TrackConstants = false;
- if (isa<ConstantData>(Op) && TrackConstants) {
+ if ((isa<ConstantData>(Op) || isa<ConstantExpr>(Op)) && TrackConstants) {
unsigned OpNo = Op.getOperandNo();
if (II && ((II->getIntrinsicID() == Intrinsic::spv_gep && OpNo == 0) ||
(II->paramHasAttr(OpNo, Attribute::ImmArg))))
@@ -405,8 +422,20 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
AggrConsts.clear();
AggrStores.clear();
- IRB->SetInsertPoint(&Func.getEntryBlock().front());
+ // StoreInst's operand type can be changed during the next transformations,
+ // so we need to store it in the set. Also store already transformed types.
+ for (auto &I : instructions(Func)) {
+ StoreInst *SI = dyn_cast<StoreInst>(&I);
+ if (!SI)
+ continue;
+ Type *ElTy = SI->getValueOperand()->getType();
+ PointerType *PTy = cast<PointerType>(SI->getOperand(1)->getType());
+ if (ElTy->isAggregateType() || ElTy->isVectorTy() ||
+ !PTy->isOpaqueOrPointeeTypeMatches(ElTy))
+ AggrStores.insert(&I);
+ }
+ IRB->SetInsertPoint(&Func.getEntryBlock().front());
for (auto &GV : Func.getParent()->globals())
processGlobalValue(GV);
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 5f890c003cbc..5c8fa7adfbdf 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -24,6 +24,24 @@ using namespace llvm;
SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize)
: PointerSize(PointerSize) {}
+SPIRVType *SPIRVGlobalRegistry::assignIntTypeToVReg(unsigned BitWidth,
+ Register VReg,
+ MachineInstr &I,
+ const SPIRVInstrInfo &TII) {
+ SPIRVType *SpirvType = getOrCreateSPIRVIntegerType(BitWidth, I, TII);
+ assignSPIRVTypeToVReg(SpirvType, VReg, *CurMF);
+ return SpirvType;
+}
+
+SPIRVType *SPIRVGlobalRegistry::assignVectTypeToVReg(
+ SPIRVType *BaseType, unsigned NumElements, Register VReg, MachineInstr &I,
+ const SPIRVInstrInfo &TII) {
+ SPIRVType *SpirvType =
+ getOrCreateSPIRVVectorType(BaseType, NumElements, I, TII);
+ assignSPIRVTypeToVReg(SpirvType, VReg, *CurMF);
+ return SpirvType;
+}
+
SPIRVType *SPIRVGlobalRegistry::assignTypeToVReg(
const Type *Type, Register VReg, MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier AccessQual, bool EmitIR) {
@@ -96,6 +114,65 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeVector(uint32_t NumElems,
return MIB;
}
+std::tuple<Register, ConstantInt *, bool>
+SPIRVGlobalRegistry::getOrCreateConstIntReg(uint64_t Val, SPIRVType *SpvType,
+ MachineIRBuilder *MIRBuilder,
+ MachineInstr *I,
+ const SPIRVInstrInfo *TII) {
+ const IntegerType *LLVMIntTy;
+ if (SpvType)
+ LLVMIntTy = cast<IntegerType>(getTypeForSPIRVType(SpvType));
+ else
+ LLVMIntTy = IntegerType::getInt32Ty(CurMF->getFunction().getContext());
+ bool NewInstr = false;
+ // Find a constant in DT or build a new one.
+ ConstantInt *CI = ConstantInt::get(const_cast<IntegerType *>(LLVMIntTy), Val);
+ Register Res = DT.find(CI, CurMF);
+ if (!Res.isValid()) {
+ unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
+ LLT LLTy = LLT::scalar(32);
+ Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
+ if (MIRBuilder)
+ assignTypeToVReg(LLVMIntTy, Res, *MIRBuilder);
+ else
+ assignIntTypeToVReg(BitWidth, Res, *I, *TII);
+ DT.add(CI, CurMF, Res);
+ NewInstr = true;
+ }
+ return std::make_tuple(Res, CI, NewInstr);
+}
+
+Register SPIRVGlobalRegistry::getOrCreateConstInt(uint64_t Val, MachineInstr &I,
+ SPIRVType *SpvType,
+ const SPIRVInstrInfo &TII) {
+ assert(SpvType);
+ ConstantInt *CI;
+ Register Res;
+ bool New;
+ std::tie(Res, CI, New) =
+ getOrCreateConstIntReg(Val, SpvType, nullptr, &I, &TII);
+ // If we have found Res register which is defined by the passed G_CONSTANT
+ // machine instruction, a new constant instruction should be created.
+ if (!New && (!I.getOperand(0).isReg() || Res != I.getOperand(0).getReg()))
+ return Res;
+ MachineInstrBuilder MIB;
+ MachineBasicBlock &BB = *I.getParent();
+ if (Val) {
+ MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI))
+ .addDef(Res)
+ .addUse(getSPIRVTypeID(SpvType));
+ addNumImm(APInt(getScalarOrVectorBitWidth(SpvType), Val), MIB);
+ } else {
+ MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
+ .addDef(Res)
+ .addUse(getSPIRVTypeID(SpvType));
+ }
+ const auto &ST = CurMF->getSubtarget();
+ constrainSelectedInstRegOperands(*MIB, *ST.getInstrInfo(),
+ *ST.getRegisterInfo(), *ST.getRegBankInfo());
+ return Res;
+}
+
Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val,
MachineIRBuilder &MIRBuilder,
SPIRVType *SpvType,
@@ -112,14 +189,32 @@ Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val,
Register Res = DT.find(ConstInt, &MF);
if (!Res.isValid()) {
unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
- Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth));
- assignTypeToVReg(LLVMIntTy, Res, MIRBuilder);
- if (EmitIR)
+ LLT LLTy = LLT::scalar(EmitIR ? BitWidth : 32);
+ Res = MF.getRegInfo().createGenericVirtualRegister(LLTy);
+ assignTypeToVReg(LLVMIntTy, Res, MIRBuilder,
+ SPIRV::AccessQualifier::ReadWrite, EmitIR);
+ DT.add(ConstInt, &MIRBuilder.getMF(), Res);
+ if (EmitIR) {
MIRBuilder.buildConstant(Res, *ConstInt);
- else
- MIRBuilder.buildInstr(SPIRV::OpConstantI)
- .addDef(Res)
- .addImm(ConstInt->getSExtValue());
+ } else {
+ MachineInstrBuilder MIB;
+ if (Val) {
+ assert(SpvType);
+ MIB = MIRBuilder.buildInstr(SPIRV::OpConstantI)
+ .addDef(Res)
+ .addUse(getSPIRVTypeID(SpvType));
+ addNumImm(APInt(BitWidth, Val), MIB);
+ } else {
+ assert(SpvType);
+ MIB = MIRBuilder.buildInstr(SPIRV::OpConstantNull)
+ .addDef(Res)
+ .addUse(getSPIRVTypeID(SpvType));
+ }
+ const auto &Subtarget = CurMF->getSubtarget();
+ constrainSelectedInstRegOperands(*MIB, *Subtarget.getInstrInfo(),
+ *Subtarget.getRegisterInfo(),
+ *Subtarget.getRegBankInfo());
+ }
}
return Res;
}
@@ -142,11 +237,63 @@ Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val,
unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth));
assignTypeToVReg(LLVMFPTy, Res, MIRBuilder);
+ DT.add(ConstFP, &MF, Res);
MIRBuilder.buildFConstant(Res, *ConstFP);
}
return Res;
}
+Register
+SPIRVGlobalRegistry::getOrCreateConsIntVector(uint64_t Val, MachineInstr &I,
+ SPIRVType *SpvType,
+ const SPIRVInstrInfo &TII) {
+ const Type *LLVMTy = getTypeForSPIRVType(SpvType);
+ assert(LLVMTy->isVectorTy());
+ const FixedVectorType *LLVMVecTy = cast<FixedVectorType>(LLVMTy);
+ Type *LLVMBaseTy = LLVMVecTy->getElementType();
+ // Find a constant vector in DT or build a new one.
+ const auto ConstInt = ConstantInt::get(LLVMBaseTy, Val);
+ auto ConstVec =
+ ConstantVector::getSplat(LLVMVecTy->getElementCount(), ConstInt);
+ Register Res = DT.find(ConstVec, CurMF);
+ if (!Res.isValid()) {
+ unsigned BitWidth = getScalarOrVectorBitWidth(SpvType);
+ SPIRVType *SpvBaseType = getOrCreateSPIRVIntegerType(BitWidth, I, TII);
+ // SpvScalConst should be created before SpvVecConst to avoid undefined ID
+ // error on validation.
+ // TODO: can moved below once sorting of types/consts/defs is implemented.
+ Register SpvScalConst;
+ if (Val)
+ SpvScalConst = getOrCreateConstInt(Val, I, SpvBaseType, TII);
+ // TODO: maybe use bitwidth of base type.
+ LLT LLTy = LLT::scalar(32);
+ Register SpvVecConst =
+ CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
+ const unsigned ElemCnt = SpvType->getOperand(2).getImm();
+ assignVectTypeToVReg(SpvBaseType, ElemCnt, SpvVecConst, I, TII);
+ DT.add(ConstVec, CurMF, SpvVecConst);
+ MachineInstrBuilder MIB;
+ MachineBasicBlock &BB = *I.getParent();
+ if (Val) {
+ MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantComposite))
+ .addDef(SpvVecConst)
+ .addUse(getSPIRVTypeID(SpvType));
+ for (unsigned i = 0; i < ElemCnt; ++i)
+ MIB.addUse(SpvScalConst);
+ } else {
+ MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
+ .addDef(SpvVecConst)
+ .addUse(getSPIRVTypeID(SpvType));
+ }
+ const auto &Subtarget = CurMF->getSubtarget();
+ constrainSelectedInstRegOperands(*MIB, *Subtarget.getInstrInfo(),
+ *Subtarget.getRegisterInfo(),
+ *Subtarget.getRegBankInfo());
+ return SpvVecConst;
+ }
+ return Res;
+}
+
Register SPIRVGlobalRegistry::buildGlobalVariable(
Register ResVReg, SPIRVType *BaseType, StringRef Name,
const GlobalValue *GV, SPIRV::StorageClass Storage,
@@ -169,7 +316,13 @@ Register SPIRVGlobalRegistry::buildGlobalVariable(
}
GV = GVar;
}
- Register Reg;
+ Register Reg = DT.find(GVar, &MIRBuilder.getMF());
+ if (Reg.isValid()) {
+ if (Reg != ResVReg)
+ MIRBuilder.buildCopy(ResVReg, Reg);
+ return ResVReg;
+ }
+
auto MIB = MIRBuilder.buildInstr(SPIRV::OpVariable)
.addDef(ResVReg)
.addUse(getSPIRVTypeID(BaseType))
@@ -234,14 +387,76 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeArray(uint32_t NumElems,
return MIB;
}
+SPIRVType *SPIRVGlobalRegistry::getOpTypeOpaque(const StructType *Ty,
+ MachineIRBuilder &MIRBuilder) {
+ assert(Ty->hasName());
+ const StringRef Name = Ty->hasName() ? Ty->getName() : "";
+ Register ResVReg = createTypeVReg(MIRBuilder);
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeOpaque).addDef(ResVReg);
+ addStringImm(Name, MIB);
+ buildOpName(ResVReg, Name, MIRBuilder);
+ return MIB;
+}
+
+SPIRVType *SPIRVGlobalRegistry::getOpTypeStruct(const StructType *Ty,
+ MachineIRBuilder &MIRBuilder,
+ bool EmitIR) {
+ SmallVector<Register, 4> FieldTypes;
+ for (const auto &Elem : Ty->elements()) {
+ SPIRVType *ElemTy = findSPIRVType(Elem, MIRBuilder);
+ assert(ElemTy && ElemTy->getOpcode() != SPIRV::OpTypeVoid &&
+ "Invalid struct element type");
+ FieldTypes.push_back(getSPIRVTypeID(ElemTy));
+ }
+ Register ResVReg = createTypeVReg(MIRBuilder);
+ auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeStruct).addDef(ResVReg);
+ for (const auto &Ty : FieldTypes)
+ MIB.addUse(Ty);
+ if (Ty->hasName())
+ buildOpName(ResVReg, Ty->getName(), MIRBuilder);
+ if (Ty->isPacked())
+ buildOpDecorate(ResVReg, MIRBuilder, SPIRV::Decoration::CPacked, {});
+ return MIB;
+}
+
+static bool isOpenCLBuiltinType(const StructType *SType) {
+ return SType->isOpaque() && SType->hasName() &&
+ SType->getName().startswith("opencl.");
+}
+
+static bool isSPIRVBuiltinType(const StructType *SType) {
+ return SType->isOpaque() && SType->hasName() &&
+ SType->getName().startswith("spirv.");
+}
+
+static bool isSpecialType(const Type *Ty) {
+ if (auto PType = dyn_cast<PointerType>(Ty)) {
+ if (!PType->isOpaque())
+ Ty = PType->getNonOpaquePointerElementType();
+ }
+ if (auto SType = dyn_cast<StructType>(Ty))
+ return isOpenCLBuiltinType(SType) || isSPIRVBuiltinType(SType);
+ return false;
+}
+
SPIRVType *SPIRVGlobalRegistry::getOpTypePointer(SPIRV::StorageClass SC,
SPIRVType *ElemType,
- MachineIRBuilder &MIRBuilder) {
- auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypePointer)
- .addDef(createTypeVReg(MIRBuilder))
- .addImm(static_cast<uint32_t>(SC))
- .addUse(getSPIRVTypeID(ElemType));
- return MIB;
+ MachineIRBuilder &MIRBuilder,
+ Register Reg) {
+ if (!Reg.isValid())
+ Reg = createTypeVReg(MIRBuilder);
+ return MIRBuilder.buildInstr(SPIRV::OpTypePointer)
+ .addDef(Reg)
+ .addImm(static_cast<uint32_t>(SC))
+ .addUse(getSPIRVTypeID(ElemType));
+}
+
+SPIRVType *
+SPIRVGlobalRegistry::getOpTypeForwardPointer(SPIRV::StorageClass SC,
+ MachineIRBuilder &MIRBuilder) {
+ return MIRBuilder.buildInstr(SPIRV::OpTypeForwardPointer)
+ .addUse(createTypeVReg(MIRBuilder))
+ .addImm(static_cast<uint32_t>(SC));
}
SPIRVType *SPIRVGlobalRegistry::getOpTypeFunction(
@@ -255,10 +470,49 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeFunction(
return MIB;
}
+SPIRVType *SPIRVGlobalRegistry::getOrCreateOpTypeFunctionWithArgs(
+ const Type *Ty, SPIRVType *RetType,
+ const SmallVectorImpl<SPIRVType *> &ArgTypes,
+ MachineIRBuilder &MIRBuilder) {
+ Register Reg = DT.find(Ty, &MIRBuilder.getMF());
+ if (Reg.isValid())
+ return getSPIRVTypeForVReg(Reg);
+ SPIRVType *SpirvType = getOpTypeFunction(RetType, ArgTypes, MIRBuilder);
+ return finishCreatingSPIRVType(Ty, SpirvType);
+}
+
+SPIRVType *SPIRVGlobalRegistry::findSPIRVType(const Type *Ty,
+ MachineIRBuilder &MIRBuilder,
+ SPIRV::AccessQualifier AccQual,
+ bool EmitIR) {
+ Register Reg = DT.find(Ty, &MIRBuilder.getMF());
+ if (Reg.isValid())
+ return getSPIRVTypeForVReg(Reg);
+ if (ForwardPointerTypes.find(Ty) != ForwardPointerTypes.end())
+ return ForwardPointerTypes[Ty];
+ return restOfCreateSPIRVType(Ty, MIRBuilder, AccQual, EmitIR);
+}
+
+Register SPIRVGlobalRegistry::getSPIRVTypeID(const SPIRVType *SpirvType) const {
+ assert(SpirvType && "Attempting to get type id for nullptr type.");
+ if (SpirvType->getOpcode() == SPIRV::OpTypeForwardPointer)
+ return SpirvType->uses().begin()->getReg();
+ return SpirvType->defs().begin()->getReg();
+}
+
SPIRVType *SPIRVGlobalRegistry::createSPIRVType(const Type *Ty,
MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier AccQual,
bool EmitIR) {
+ assert(!isSpecialType(Ty));
+ auto &TypeToSPIRVTypeMap = DT.getTypes()->getAllUses();
+ auto t = TypeToSPIRVTypeMap.find(Ty);
+ if (t != TypeToSPIRVTypeMap.end()) {
+ auto tt = t->second.find(&MIRBuilder.getMF());
+ if (tt != t->second.end())
+ return getSPIRVTypeForVReg(tt->second);
+ }
+
if (auto IType = dyn_cast<IntegerType>(Ty)) {
const unsigned Width = IType->getBitWidth();
return Width == 1 ? getOpTypeBool(MIRBuilder)
@@ -269,21 +523,25 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(const Type *Ty,
if (Ty->isVoidTy())
return getOpTypeVoid(MIRBuilder);
if (Ty->isVectorTy()) {
- auto El = getOrCreateSPIRVType(cast<FixedVectorType>(Ty)->getElementType(),
- MIRBuilder);
+ SPIRVType *El =
+ findSPIRVType(cast<FixedVectorType>(Ty)->getElementType(), MIRBuilder);
return getOpTypeVector(cast<FixedVectorType>(Ty)->getNumElements(), El,
MIRBuilder);
}
if (Ty->isArrayTy()) {
- auto *El = getOrCreateSPIRVType(Ty->getArrayElementType(), MIRBuilder);
+ SPIRVType *El = findSPIRVType(Ty->getArrayElementType(), MIRBuilder);
return getOpTypeArray(Ty->getArrayNumElements(), El, MIRBuilder, EmitIR);
}
- assert(!isa<StructType>(Ty) && "Unsupported StructType");
+ if (auto SType = dyn_cast<StructType>(Ty)) {
+ if (SType->isOpaque())
+ return getOpTypeOpaque(SType, MIRBuilder);
+ return getOpTypeStruct(SType, MIRBuilder, EmitIR);
+ }
if (auto FType = dyn_cast<FunctionType>(Ty)) {
- SPIRVType *RetTy = getOrCreateSPIRVType(FType->getReturnType(), MIRBuilder);
+ SPIRVType *RetTy = findSPIRVType(FType->getReturnType(), MIRBuilder);
SmallVector<SPIRVType *, 4> ParamTypes;
for (const auto &t : FType->params()) {
- ParamTypes.push_back(getOrCreateSPIRVType(t, MIRBuilder));
+ ParamTypes.push_back(findSPIRVType(t, MIRBuilder));
}
return getOpTypeFunction(RetTy, ParamTypes, MIRBuilder);
}
@@ -292,24 +550,51 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(const Type *Ty,
// At the moment, all opaque pointers correspond to i8 element type.
// TODO: change the implementation once opaque pointers are supported
// in the SPIR-V specification.
- if (PType->isOpaque()) {
+ if (PType->isOpaque())
SpvElementType = getOrCreateSPIRVIntegerType(8, MIRBuilder);
- } else {
- Type *ElemType = PType->getNonOpaquePointerElementType();
- // TODO: support OpenCL and SPIRV builtins like image2d_t that are passed
- // as pointers, but should be treated as custom types like OpTypeImage.
- assert(!isa<StructType>(ElemType) && "Unsupported StructType pointer");
-
- // Otherwise, treat it as a regular pointer type.
- SpvElementType = getOrCreateSPIRVType(
- ElemType, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, EmitIR);
- }
+ else
+ SpvElementType =
+ findSPIRVType(PType->getNonOpaquePointerElementType(), MIRBuilder,
+ SPIRV::AccessQualifier::ReadWrite, EmitIR);
auto SC = addressSpaceToStorageClass(PType->getAddressSpace());
- return getOpTypePointer(SC, SpvElementType, MIRBuilder);
+ // Null pointer means we have a loop in type definitions, make and
+ // return corresponding OpTypeForwardPointer.
+ if (SpvElementType == nullptr) {
+ if (ForwardPointerTypes.find(Ty) == ForwardPointerTypes.end())
+ ForwardPointerTypes[PType] = getOpTypeForwardPointer(SC, MIRBuilder);
+ return ForwardPointerTypes[PType];
+ }
+ Register Reg(0);
+ // If we have forward pointer associated with this type, use its register
+ // operand to create OpTypePointer.
+ if (ForwardPointerTypes.find(PType) != ForwardPointerTypes.end())
+ Reg = getSPIRVTypeID(ForwardPointerTypes[PType]);
+
+ return getOpTypePointer(SC, SpvElementType, MIRBuilder, Reg);
}
llvm_unreachable("Unable to convert LLVM type to SPIRVType");
}
+SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(
+ const Type *Ty, MachineIRBuilder &MIRBuilder,
+ SPIRV::AccessQualifier AccessQual, bool EmitIR) {
+ if (TypesInProcessing.count(Ty) && !Ty->isPointerTy())
+ return nullptr;
+ TypesInProcessing.insert(Ty);
+ SPIRVType *SpirvType = createSPIRVType(Ty, MIRBuilder, AccessQual, EmitIR);
+ TypesInProcessing.erase(Ty);
+ VRegToTypeMap[&MIRBuilder.getMF()][getSPIRVTypeID(SpirvType)] = SpirvType;
+ SPIRVToLLVMType[SpirvType] = Ty;
+ Register Reg = DT.find(Ty, &MIRBuilder.getMF());
+ // Do not add OpTypeForwardPointer to DT, a corresponding normal pointer type
+ // will be added later. For special types it is already added to DT.
+ if (SpirvType->getOpcode() != SPIRV::OpTypeForwardPointer && !Reg.isValid() &&
+ !isSpecialType(Ty))
+ DT.add(Ty, &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType));
+
+ return SpirvType;
+}
+
SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const {
auto t = VRegToTypeMap.find(CurMF);
if (t != VRegToTypeMap.end()) {
@@ -321,13 +606,26 @@ SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const {
}
SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(
- const Type *Type, MachineIRBuilder &MIRBuilder,
+ const Type *Ty, MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier AccessQual, bool EmitIR) {
- Register Reg = DT.find(Type, &MIRBuilder.getMF());
+ Register Reg = DT.find(Ty, &MIRBuilder.getMF());
if (Reg.isValid())
return getSPIRVTypeForVReg(Reg);
- SPIRVType *SpirvType = createSPIRVType(Type, MIRBuilder, AccessQual, EmitIR);
- return restOfCreateSPIRVType(Type, SpirvType);
+ TypesInProcessing.clear();
+ SPIRVType *STy = restOfCreateSPIRVType(Ty, MIRBuilder, AccessQual, EmitIR);
+ // Create normal pointer types for the corresponding OpTypeForwardPointers.
+ for (auto &CU : ForwardPointerTypes) {
+ const Type *Ty2 = CU.first;
+ SPIRVType *STy2 = CU.second;
+ if ((Reg = DT.find(Ty2, &MIRBuilder.getMF())).isValid())
+ STy2 = getSPIRVTypeForVReg(Reg);
+ else
+ STy2 = restOfCreateSPIRVType(Ty2, MIRBuilder, AccessQual, EmitIR);
+ if (Ty == Ty2)
+ STy = STy2;
+ }
+ ForwardPointerTypes.clear();
+ return STy;
}
bool SPIRVGlobalRegistry::isScalarOfType(Register VReg,
@@ -393,8 +691,8 @@ SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(unsigned BitWidth,
MIRBuilder);
}
-SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(const Type *LLVMTy,
- SPIRVType *SpirvType) {
+SPIRVType *SPIRVGlobalRegistry::finishCreatingSPIRVType(const Type *LLVMTy,
+ SPIRVType *SpirvType) {
assert(CurMF == SpirvType->getMF());
VRegToTypeMap[CurMF][getSPIRVTypeID(SpirvType)] = SpirvType;
SPIRVToLLVMType[SpirvType] = LLVMTy;
@@ -413,7 +711,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(
.addDef(createTypeVReg(CurMF->getRegInfo()))
.addImm(BitWidth)
.addImm(0);
- return restOfCreateSPIRVType(LLVMTy, MIB);
+ return finishCreatingSPIRVType(LLVMTy, MIB);
}
SPIRVType *
@@ -423,6 +721,19 @@ SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder) {
MIRBuilder);
}
+SPIRVType *
+SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineInstr &I,
+ const SPIRVInstrInfo &TII) {
+ Type *LLVMTy = IntegerType::get(CurMF->getFunction().getContext(), 1);
+ Register Reg = DT.find(LLVMTy, CurMF);
+ if (Reg.isValid())
+ return getSPIRVTypeForVReg(Reg);
+ MachineBasicBlock &BB = *I.getParent();
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeBool))
+ .addDef(createTypeVReg(CurMF->getRegInfo()));
+ return finishCreatingSPIRVType(LLVMTy, MIB);
+}
+
SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType(
SPIRVType *BaseType, unsigned NumElements, MachineIRBuilder &MIRBuilder) {
return getOrCreateSPIRVType(
@@ -436,12 +747,15 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType(
const SPIRVInstrInfo &TII) {
Type *LLVMTy = FixedVectorType::get(
const_cast<Type *>(getTypeForSPIRVType(BaseType)), NumElements);
+ Register Reg = DT.find(LLVMTy, CurMF);
+ if (Reg.isValid())
+ return getSPIRVTypeForVReg(Reg);
MachineBasicBlock &BB = *I.getParent();
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeVector))
.addDef(createTypeVReg(CurMF->getRegInfo()))
.addUse(getSPIRVTypeID(BaseType))
.addImm(NumElements);
- return restOfCreateSPIRVType(LLVMTy, MIB);
+ return finishCreatingSPIRVType(LLVMTy, MIB);
}
SPIRVType *
@@ -460,10 +774,39 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVPointerType(
Type *LLVMTy =
PointerType::get(const_cast<Type *>(getTypeForSPIRVType(BaseType)),
storageClassToAddressSpace(SC));
+ Register Reg = DT.find(LLVMTy, CurMF);
+ if (Reg.isValid())
+ return getSPIRVTypeForVReg(Reg);
MachineBasicBlock &BB = *I.getParent();
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypePointer))
.addDef(createTypeVReg(CurMF->getRegInfo()))
.addImm(static_cast<uint32_t>(SC))
.addUse(getSPIRVTypeID(BaseType));
- return restOfCreateSPIRVType(LLVMTy, MIB);
+ return finishCreatingSPIRVType(LLVMTy, MIB);
+}
+
+Register SPIRVGlobalRegistry::getOrCreateUndef(MachineInstr &I,
+ SPIRVType *SpvType,
+ const SPIRVInstrInfo &TII) {
+ assert(SpvType);
+ const Type *LLVMTy = getTypeForSPIRVType(SpvType);
+ assert(LLVMTy);
+ // Find a constant in DT or build a new one.
+ UndefValue *UV = UndefValue::get(const_cast<Type *>(LLVMTy));
+ Register Res = DT.find(UV, CurMF);
+ if (Res.isValid())
+ return Res;
+ LLT LLTy = LLT::scalar(32);
+ Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
+ assignSPIRVTypeToVReg(SpvType, Res, *CurMF);
+ DT.add(UV, CurMF, Res);
+
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpUndef))
+ .addDef(Res)
+ .addUse(getSPIRVTypeID(SpvType));
+ const auto &ST = CurMF->getSubtarget();
+ constrainSelectedInstRegOperands(*MIB, *ST.getInstrInfo(),
+ *ST.getRegisterInfo(), *ST.getRegBankInfo());
+ return Res;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index 13dcc20a3e0a..59ac2712a02f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -30,7 +30,7 @@ class SPIRVGlobalRegistry {
// Do not confuse this with DuplicatesTracker as DT maps Type* to <MF, Reg>
// where Reg = OpType...
// while VRegToTypeMap tracks SPIR-V type assigned to other regs (i.e. not
- // type-declaring ones)
+ // type-declaring ones).
DenseMap<const MachineFunction *, DenseMap<Register, SPIRVType *>>
VRegToTypeMap;
@@ -38,6 +38,9 @@ class SPIRVGlobalRegistry {
DenseMap<SPIRVType *, const Type *> SPIRVToLLVMType;
+ SmallPtrSet<const Type *, 4> TypesInProcessing;
+ DenseMap<const Type *, SPIRVType *> ForwardPointerTypes;
+
// Number of bits pointers and size_t integers require.
const unsigned PointerSize;
@@ -46,6 +49,14 @@ class SPIRVGlobalRegistry {
createSPIRVType(const Type *Type, MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite,
bool EmitIR = true);
+ SPIRVType *findSPIRVType(
+ const Type *Ty, MachineIRBuilder &MIRBuilder,
+ SPIRV::AccessQualifier accessQual = SPIRV::AccessQualifier::ReadWrite,
+ bool EmitIR = true);
+ SPIRVType *restOfCreateSPIRVType(const Type *Type,
+ MachineIRBuilder &MIRBuilder,
+ SPIRV::AccessQualifier AccessQual,
+ bool EmitIR);
public:
SPIRVGlobalRegistry(unsigned PointerSize);
@@ -91,6 +102,11 @@ public:
const Type *Type, Register VReg, MachineIRBuilder &MIRBuilder,
SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite,
bool EmitIR = true);
+ SPIRVType *assignIntTypeToVReg(unsigned BitWidth, Register VReg,
+ MachineInstr &I, const SPIRVInstrInfo &TII);
+ SPIRVType *assignVectTypeToVReg(SPIRVType *BaseType, unsigned NumElements,
+ Register VReg, MachineInstr &I,
+ const SPIRVInstrInfo &TII);
// In cases where the SPIR-V type is already known, this function can be
// used to map it to the given VReg via an ASSIGN_TYPE instruction.
@@ -123,10 +139,7 @@ public:
}
// Return the VReg holding the result of the given OpTypeXXX instruction.
- Register getSPIRVTypeID(const SPIRVType *SpirvType) const {
- assert(SpirvType && "Attempting to get type id for nullptr type.");
- return SpirvType->defs().begin()->getReg();
- }
+ Register getSPIRVTypeID(const SPIRVType *SpirvType) const;
void setCurrentFunc(MachineFunction &MF) { CurMF = &MF; }
@@ -167,19 +180,38 @@ private:
SPIRVType *getOpTypeArray(uint32_t NumElems, SPIRVType *ElemType,
MachineIRBuilder &MIRBuilder, bool EmitIR = true);
+ SPIRVType *getOpTypeOpaque(const StructType *Ty,
+ MachineIRBuilder &MIRBuilder);
+
+ SPIRVType *getOpTypeStruct(const StructType *Ty, MachineIRBuilder &MIRBuilder,
+ bool EmitIR = true);
+
SPIRVType *getOpTypePointer(SPIRV::StorageClass SC, SPIRVType *ElemType,
- MachineIRBuilder &MIRBuilder);
+ MachineIRBuilder &MIRBuilder, Register Reg);
+
+ SPIRVType *getOpTypeForwardPointer(SPIRV::StorageClass SC,
+ MachineIRBuilder &MIRBuilder);
SPIRVType *getOpTypeFunction(SPIRVType *RetType,
const SmallVectorImpl<SPIRVType *> &ArgTypes,
MachineIRBuilder &MIRBuilder);
- SPIRVType *restOfCreateSPIRVType(const Type *LLVMTy, SPIRVType *SpirvType);
+ std::tuple<Register, ConstantInt *, bool> getOrCreateConstIntReg(
+ uint64_t Val, SPIRVType *SpvType, MachineIRBuilder *MIRBuilder,
+ MachineInstr *I = nullptr, const SPIRVInstrInfo *TII = nullptr);
+ SPIRVType *finishCreatingSPIRVType(const Type *LLVMTy, SPIRVType *SpirvType);
public:
Register buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder,
SPIRVType *SpvType = nullptr, bool EmitIR = true);
+ Register getOrCreateConstInt(uint64_t Val, MachineInstr &I,
+ SPIRVType *SpvType, const SPIRVInstrInfo &TII);
Register buildConstantFP(APFloat Val, MachineIRBuilder &MIRBuilder,
SPIRVType *SpvType = nullptr);
+ Register getOrCreateConsIntVector(uint64_t Val, MachineInstr &I,
+ SPIRVType *SpvType,
+ const SPIRVInstrInfo &TII);
+ Register getOrCreateUndef(MachineInstr &I, SPIRVType *SpvType,
+ const SPIRVInstrInfo &TII);
Register
buildGlobalVariable(Register Reg, SPIRVType *BaseType, StringRef Name,
const GlobalValue *GV, SPIRV::StorageClass Storage,
@@ -193,19 +225,24 @@ public:
SPIRVType *getOrCreateSPIRVIntegerType(unsigned BitWidth, MachineInstr &I,
const SPIRVInstrInfo &TII);
SPIRVType *getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder);
+ SPIRVType *getOrCreateSPIRVBoolType(MachineInstr &I,
+ const SPIRVInstrInfo &TII);
SPIRVType *getOrCreateSPIRVVectorType(SPIRVType *BaseType,
unsigned NumElements,
MachineIRBuilder &MIRBuilder);
SPIRVType *getOrCreateSPIRVVectorType(SPIRVType *BaseType,
unsigned NumElements, MachineInstr &I,
const SPIRVInstrInfo &TII);
-
SPIRVType *getOrCreateSPIRVPointerType(
SPIRVType *BaseType, MachineIRBuilder &MIRBuilder,
SPIRV::StorageClass SClass = SPIRV::StorageClass::Function);
SPIRVType *getOrCreateSPIRVPointerType(
SPIRVType *BaseType, MachineInstr &I, const SPIRVInstrInfo &TII,
SPIRV::StorageClass SClass = SPIRV::StorageClass::Function);
+ SPIRVType *getOrCreateOpTypeFunctionWithArgs(
+ const Type *Ty, SPIRVType *RetType,
+ const SmallVectorImpl<SPIRVType *> &ArgTypes,
+ MachineIRBuilder &MIRBuilder);
};
} // end namespace llvm
#endif // LLLVM_LIB_TARGET_SPIRV_SPIRVTYPEMANAGER_H
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
index 754906308114..66d8b17b4296 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp
@@ -52,7 +52,7 @@ bool SPIRVInstrInfo::isTypeDeclInstr(const MachineInstr &MI) const {
auto DefRegClass = MRI.getRegClassOrNull(MI.getOperand(0).getReg());
return DefRegClass && DefRegClass->getID() == SPIRV::TYPERegClass.getID();
} else {
- return false;
+ return MI.getOpcode() == SPIRV::OpTypeForwardPointer;
}
}
@@ -193,3 +193,15 @@ void SPIRVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
auto &MRI = I->getMF()->getRegInfo();
MRI.replaceRegWith(DstOp.getReg(), SrcOp.getReg());
}
+
+bool SPIRVInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ if (MI.getOpcode() == SPIRV::GET_ID || MI.getOpcode() == SPIRV::GET_fID ||
+ MI.getOpcode() == SPIRV::GET_pID || MI.getOpcode() == SPIRV::GET_vfID ||
+ MI.getOpcode() == SPIRV::GET_vID) {
+ auto &MRI = MI.getMF()->getRegInfo();
+ MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return true;
+ }
+ return false;
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
index 2600d9cfca2e..334351c8eeae 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h
@@ -48,6 +48,7 @@ public:
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
+ bool expandPostRAPseudo(MachineInstr &MI) const override;
};
} // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index d6fec5fd0785..d1c20795f804 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -449,6 +449,7 @@ def OpCopyLogical: UnOp<"OpCopyLogical", 400>;
def OpSNegate: UnOp<"OpSNegate", 126>;
def OpFNegate: UnOpTyped<"OpFNegate", 127, fID, fneg>;
+def OpFNegateV: UnOpTyped<"OpFNegate", 127, vfID, fneg>;
defm OpIAdd: BinOpTypedGen<"OpIAdd", 128, add, 0, 1>;
defm OpFAdd: BinOpTypedGen<"OpFAdd", 129, fadd, 1, 1>;
@@ -618,8 +619,10 @@ def OpAtomicCompareExchange: Op<230, (outs ID:$res),
(ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$eq,
ID:$neq, ID:$val, ID:$cmp),
"$res = OpAtomicCompareExchange $ty $ptr $sc $eq $neq $val $cmp">;
-// TODO Currently the following deprecated opcode is missing:
-// OpAtomicCompareExchangeWeak
+def OpAtomicCompareExchangeWeak: Op<231, (outs ID:$res),
+ (ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$eq,
+ ID:$neq, ID:$val, ID:$cmp),
+ "$res = OpAtomicCompareExchangeWeak $ty $ptr $sc $eq $neq $val $cmp">;
def OpAtomicIIncrement: AtomicOp<"OpAtomicIIncrement", 232>;
def OpAtomicIDecrement: AtomicOp<"OpAtomicIDecrement", 233>;
@@ -660,6 +663,11 @@ def OpMemoryNamedBarrier: Op<329, (outs), (ins ID:$barr, ID:$mem, ID:$sem),
// 3.42.21. Group and Subgroup Instructions
+def OpGroupAsyncCopy: Op<259, (outs ID:$res), (ins TYPE:$ty, ID:$scope,
+ ID:$dst, ID:$src, ID:$nelts, ID:$stride, ID:$event),
+ "$res = OpGroupAsyncCopy $ty $scope $dst $src $nelts $stride $event">;
+def OpGroupWaitEvents: Op<260, (outs), (ins ID:$scope, ID:$nelts, ID:$elist),
+ "OpGroupWaitEvents $scope $nelts $elist">;
def OpGroupAll: Op<261, (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$pr),
"$res = OpGroupAll $ty $scope $pr">;
def OpGroupAny: Op<262, (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$pr),
@@ -680,6 +688,18 @@ def OpGroupUMax: OpGroup<"UMax", 270>;
def OpGroupSMax: OpGroup<"SMax", 271>;
// TODO: 3.42.22. Device-Side Enqueue Instructions
+def OpRetainEvent: Op<297, (outs), (ins ID:$event), "OpRetainEvent $event">;
+def OpReleaseEvent: Op<298, (outs), (ins ID:$event), "OpReleaseEvent $event">;
+def OpCreateUserEvent: Op<299, (outs ID:$res), (ins TYPE:$type),
+ "$res = OpCreateUserEvent $type">;
+def OpIsValidEvent: Op<300, (outs ID:$res), (ins TYPE:$type, ID:$event),
+ "$res = OpIsValidEvent $type $event ">;
+def OpSetUserEventStatus: Op<301, (outs), (ins ID:$event, ID:$status),
+ "OpSetUserEventStatus $event $status">;
+def OpCaptureEventProfilingInfo: Op<302, (outs),
+ (ins ID:$event, ID:$info, ID:$value),
+ "OpCaptureEventProfilingInfo $event $info $value">;
+
// TODO: 3.42.23. Pipe Instructions
// 3.42.24. Non-Uniform Instructions
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 90b921a06f21..9365fd22e4e7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -197,6 +197,8 @@ void SPIRVInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB,
InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
}
+static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI);
+
// Defined in SPIRVLegalizerInfo.cpp.
extern bool isTypeFoldingSupported(unsigned Opcode);
@@ -335,6 +337,30 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast);
case TargetOpcode::G_ADDRSPACE_CAST:
return selectAddrSpaceCast(ResVReg, ResType, I);
+ case TargetOpcode::G_PTR_ADD: {
+ // Currently, we get G_PTR_ADD only as a result of translating
+ // global variables, initialized with constant expressions like GV + Const
+ // (see test opencl/basic/progvar_prog_scope_init.ll).
+ // TODO: extend the handler once we have other cases.
+ assert(I.getOperand(1).isReg() && I.getOperand(2).isReg());
+ Register GV = I.getOperand(1).getReg();
+ MachineRegisterInfo::def_instr_iterator II = MRI->def_instr_begin(GV);
+ assert(((*II).getOpcode() == TargetOpcode::G_GLOBAL_VALUE ||
+ (*II).getOpcode() == TargetOpcode::COPY ||
+ (*II).getOpcode() == SPIRV::OpVariable) &&
+ isImm(I.getOperand(2), MRI));
+ Register Idx = buildZerosVal(GR.getOrCreateSPIRVIntegerType(32, I, TII), I);
+ MachineBasicBlock &BB = *I.getParent();
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpSpecConstantOp))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addImm(static_cast<uint32_t>(
+ SPIRV::Opcode::InBoundsPtrAccessChain))
+ .addUse(GV)
+ .addUse(Idx)
+ .addUse(I.getOperand(2).getReg());
+ return MIB.constrainAllUses(TII, TRI, RBI);
+ }
case TargetOpcode::G_ATOMICRMW_OR:
return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicOr);
@@ -387,23 +413,6 @@ bool SPIRVInstructionSelector::selectUnOp(Register ResVReg,
Opcode);
}
-static SPIRV::MemorySemantics getMemSemantics(AtomicOrdering Ord) {
- switch (Ord) {
- case AtomicOrdering::Acquire:
- return SPIRV::MemorySemantics::Acquire;
- case AtomicOrdering::Release:
- return SPIRV::MemorySemantics::Release;
- case AtomicOrdering::AcquireRelease:
- return SPIRV::MemorySemantics::AcquireRelease;
- case AtomicOrdering::SequentiallyConsistent:
- return SPIRV::MemorySemantics::SequentiallyConsistent;
- case AtomicOrdering::Unordered:
- case AtomicOrdering::Monotonic:
- case AtomicOrdering::NotAtomic:
- return SPIRV::MemorySemantics::None;
- }
-}
-
static SPIRV::Scope getScope(SyncScope::ID Ord) {
switch (Ord) {
case SyncScope::SingleThread:
@@ -484,16 +493,15 @@ bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg,
MachineInstr &I) const {
MachineBasicBlock &BB = *I.getParent();
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCopyMemorySized))
- .addDef(I.getOperand(0).getReg())
+ .addUse(I.getOperand(0).getReg())
.addUse(I.getOperand(1).getReg())
.addUse(I.getOperand(2).getReg());
if (I.getNumMemOperands())
addMemoryOperands(*I.memoperands_begin(), MIB);
bool Result = MIB.constrainAllUses(TII, TRI, RBI);
- if (ResVReg.isValid() && ResVReg != MIB->getOperand(0).getReg()) {
+ if (ResVReg.isValid() && ResVReg != MIB->getOperand(0).getReg())
BuildMI(BB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), ResVReg)
.addUse(MIB->getOperand(0).getReg());
- }
return Result;
}
@@ -541,36 +549,71 @@ bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const {
bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
- assert(I.hasOneMemOperand());
- const MachineMemOperand *MemOp = *I.memoperands_begin();
- uint32_t Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID()));
- Register ScopeReg = buildI32Constant(Scope, I);
-
+ Register ScopeReg;
+ Register MemSemEqReg;
+ Register MemSemNeqReg;
Register Ptr = I.getOperand(2).getReg();
+ if (I.getOpcode() != TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) {
+ assert(I.hasOneMemOperand());
+ const MachineMemOperand *MemOp = *I.memoperands_begin();
+ unsigned Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID()));
+ ScopeReg = buildI32Constant(Scope, I);
+
+ unsigned ScSem = static_cast<uint32_t>(
+ getMemSemanticsForStorageClass(GR.getPointerStorageClass(Ptr)));
+ AtomicOrdering AO = MemOp->getSuccessOrdering();
+ unsigned MemSemEq = static_cast<uint32_t>(getMemSemantics(AO)) | ScSem;
+ MemSemEqReg = buildI32Constant(MemSemEq, I);
+ AtomicOrdering FO = MemOp->getFailureOrdering();
+ unsigned MemSemNeq = static_cast<uint32_t>(getMemSemantics(FO)) | ScSem;
+ MemSemNeqReg =
+ MemSemEq == MemSemNeq ? MemSemEqReg : buildI32Constant(MemSemNeq, I);
+ } else {
+ ScopeReg = I.getOperand(5).getReg();
+ MemSemEqReg = I.getOperand(6).getReg();
+ MemSemNeqReg = I.getOperand(7).getReg();
+ }
+
Register Cmp = I.getOperand(3).getReg();
Register Val = I.getOperand(4).getReg();
-
SPIRVType *SpvValTy = GR.getSPIRVTypeForVReg(Val);
- SPIRV::StorageClass SC = GR.getPointerStorageClass(Ptr);
- uint32_t ScSem = static_cast<uint32_t>(getMemSemanticsForStorageClass(SC));
- AtomicOrdering AO = MemOp->getSuccessOrdering();
- uint32_t MemSemEq = static_cast<uint32_t>(getMemSemantics(AO)) | ScSem;
- Register MemSemEqReg = buildI32Constant(MemSemEq, I);
- AtomicOrdering FO = MemOp->getFailureOrdering();
- uint32_t MemSemNeq = static_cast<uint32_t>(getMemSemantics(FO)) | ScSem;
- Register MemSemNeqReg =
- MemSemEq == MemSemNeq ? MemSemEqReg : buildI32Constant(MemSemNeq, I);
+ Register ACmpRes = MRI->createVirtualRegister(&SPIRV::IDRegClass);
const DebugLoc &DL = I.getDebugLoc();
- return BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpAtomicCompareExchange))
- .addDef(ResVReg)
- .addUse(GR.getSPIRVTypeID(SpvValTy))
- .addUse(Ptr)
- .addUse(ScopeReg)
- .addUse(MemSemEqReg)
- .addUse(MemSemNeqReg)
- .addUse(Val)
- .addUse(Cmp)
- .constrainAllUses(TII, TRI, RBI);
+ bool Result =
+ BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpAtomicCompareExchange))
+ .addDef(ACmpRes)
+ .addUse(GR.getSPIRVTypeID(SpvValTy))
+ .addUse(Ptr)
+ .addUse(ScopeReg)
+ .addUse(MemSemEqReg)
+ .addUse(MemSemNeqReg)
+ .addUse(Val)
+ .addUse(Cmp)
+ .constrainAllUses(TII, TRI, RBI);
+ Register CmpSuccReg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
+ SPIRVType *BoolTy = GR.getOrCreateSPIRVBoolType(I, TII);
+ Result |= BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpIEqual))
+ .addDef(CmpSuccReg)
+ .addUse(GR.getSPIRVTypeID(BoolTy))
+ .addUse(ACmpRes)
+ .addUse(Cmp)
+ .constrainAllUses(TII, TRI, RBI);
+ Register TmpReg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
+ Result |= BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpCompositeInsert))
+ .addDef(TmpReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(ACmpRes)
+ .addUse(GR.getOrCreateUndef(I, ResType, TII))
+ .addImm(0)
+ .constrainAllUses(TII, TRI, RBI);
+ Result |= BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpCompositeInsert))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(CmpSuccReg)
+ .addUse(TmpReg)
+ .addImm(1)
+ .constrainAllUses(TII, TRI, RBI);
+ return Result;
}
static bool isGenericCastablePtr(SPIRV::StorageClass SC) {
@@ -592,6 +635,27 @@ static bool isGenericCastablePtr(SPIRV::StorageClass SC) {
bool SPIRVInstructionSelector::selectAddrSpaceCast(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
+ // If the AddrSpaceCast user is single and in OpConstantComposite or
+ // OpVariable, we should select OpSpecConstantOp.
+ auto UIs = MRI->use_instructions(ResVReg);
+ if (!UIs.empty() && ++UIs.begin() == UIs.end() &&
+ (UIs.begin()->getOpcode() == SPIRV::OpConstantComposite ||
+ UIs.begin()->getOpcode() == SPIRV::OpVariable ||
+ isSpvIntrinsic(*UIs.begin(), Intrinsic::spv_init_global))) {
+ Register NewReg = I.getOperand(1).getReg();
+ MachineBasicBlock &BB = *I.getParent();
+ SPIRVType *SpvBaseTy = GR.getOrCreateSPIRVIntegerType(8, I, TII);
+ ResType = GR.getOrCreateSPIRVPointerType(SpvBaseTy, I, TII,
+ SPIRV::StorageClass::Generic);
+ bool Result =
+ BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpSpecConstantOp))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addImm(static_cast<uint32_t>(SPIRV::Opcode::PtrCastToGeneric))
+ .addUse(NewReg)
+ .constrainAllUses(TII, TRI, RBI);
+ return Result;
+ }
Register SrcPtr = I.getOperand(1).getReg();
SPIRVType *SrcPtrTy = GR.getSPIRVTypeForVReg(SrcPtr);
SPIRV::StorageClass SrcSC = GR.getPointerStorageClass(SrcPtr);
@@ -842,7 +906,9 @@ bool SPIRVInstructionSelector::selectFCmp(Register ResVReg,
Register SPIRVInstructionSelector::buildZerosVal(const SPIRVType *ResType,
MachineInstr &I) const {
- return buildI32Constant(0, I, ResType);
+ if (ResType->getOpcode() == SPIRV::OpTypeVector)
+ return GR.getOrCreateConsIntVector(0, I, ResType, TII);
+ return GR.getOrCreateConstInt(0, I, ResType, TII);
}
Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes,
@@ -851,20 +917,9 @@ Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes,
unsigned BitWidth = GR.getScalarOrVectorBitWidth(ResType);
APInt One = AllOnes ? APInt::getAllOnesValue(BitWidth)
: APInt::getOneBitSet(BitWidth, 0);
- Register OneReg = buildI32Constant(One.getZExtValue(), I, ResType);
- if (ResType->getOpcode() == SPIRV::OpTypeVector) {
- const unsigned NumEles = ResType->getOperand(2).getImm();
- Register OneVec = MRI->createVirtualRegister(&SPIRV::IDRegClass);
- unsigned Opcode = SPIRV::OpConstantComposite;
- auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode))
- .addDef(OneVec)
- .addUse(GR.getSPIRVTypeID(ResType));
- for (unsigned i = 0; i < NumEles; ++i)
- MIB.addUse(OneReg);
- constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
- return OneVec;
- }
- return OneReg;
+ if (ResType->getOpcode() == SPIRV::OpTypeVector)
+ return GR.getOrCreateConsIntVector(One.getZExtValue(), I, ResType, TII);
+ return GR.getOrCreateConstInt(One.getZExtValue(), I, ResType, TII);
}
bool SPIRVInstructionSelector::selectSelect(Register ResVReg,
@@ -959,13 +1014,23 @@ bool SPIRVInstructionSelector::selectConst(Register ResVReg,
const SPIRVType *ResType,
const APInt &Imm,
MachineInstr &I) const {
- assert(ResType->getOpcode() != SPIRV::OpTypePointer || Imm.isNullValue());
+ unsigned TyOpcode = ResType->getOpcode();
+ assert(TyOpcode != SPIRV::OpTypePointer || Imm.isNullValue());
MachineBasicBlock &BB = *I.getParent();
- if (ResType->getOpcode() == SPIRV::OpTypePointer && Imm.isNullValue()) {
+ if ((TyOpcode == SPIRV::OpTypePointer || TyOpcode == SPIRV::OpTypeEvent) &&
+ Imm.isNullValue())
return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(ResType))
.constrainAllUses(TII, TRI, RBI);
+ if (TyOpcode == SPIRV::OpTypeInt) {
+ Register Reg = GR.getOrCreateConstInt(Imm.getZExtValue(), I, ResType, TII);
+ if (Reg == ResVReg)
+ return true;
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY))
+ .addDef(ResVReg)
+ .addUse(Reg)
+ .constrainAllUses(TII, TRI, RBI);
}
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI))
.addDef(ResVReg)
@@ -1006,29 +1071,29 @@ bool SPIRVInstructionSelector::selectInsertVal(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
MachineBasicBlock &BB = *I.getParent();
- return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeInsert))
- .addDef(ResVReg)
- .addUse(GR.getSPIRVTypeID(ResType))
- // object to insert
- .addUse(I.getOperand(3).getReg())
- // composite to insert into
- .addUse(I.getOperand(2).getReg())
- // TODO: support arbitrary number of indices
- .addImm(foldImm(I.getOperand(4), MRI))
- .constrainAllUses(TII, TRI, RBI);
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeInsert))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ // object to insert
+ .addUse(I.getOperand(3).getReg())
+ // composite to insert into
+ .addUse(I.getOperand(2).getReg());
+ for (unsigned i = 4; i < I.getNumOperands(); i++)
+ MIB.addImm(foldImm(I.getOperand(i), MRI));
+ return MIB.constrainAllUses(TII, TRI, RBI);
}
bool SPIRVInstructionSelector::selectExtractVal(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
MachineBasicBlock &BB = *I.getParent();
- return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
- .addDef(ResVReg)
- .addUse(GR.getSPIRVTypeID(ResType))
- .addUse(I.getOperand(2).getReg())
- // TODO: support arbitrary number of indices
- .addImm(foldImm(I.getOperand(3), MRI))
- .constrainAllUses(TII, TRI, RBI);
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(I.getOperand(2).getReg());
+ for (unsigned i = 3; i < I.getNumOperands(); i++)
+ MIB.addImm(foldImm(I.getOperand(i), MRI));
+ return MIB.constrainAllUses(TII, TRI, RBI);
}
bool SPIRVInstructionSelector::selectInsertElt(Register ResVReg,
@@ -1154,6 +1219,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
}
return MIB.constrainAllUses(TII, TRI, RBI);
} break;
+ case Intrinsic::spv_cmpxchg:
+ return selectAtomicCmpXchg(ResVReg, ResType, I);
+ break;
default:
llvm_unreachable("Intrinsic selection not implemented");
}
@@ -1239,8 +1307,32 @@ bool SPIRVInstructionSelector::selectGlobalValue(
GV->getType(), MIRBuilder, SPIRV::AccessQualifier::ReadWrite, false);
std::string GlobalIdent = GV->getGlobalIdentifier();
- // TODO: suport @llvm.global.annotations.
+ // We have functions as operands in tests with blocks of instruction e.g. in
+ // transcoding/global_block.ll. These operands are not used and should be
+ // substituted by zero constants. Their type is expected to be always
+ // OpTypePointer Function %uchar.
+ if (isa<Function>(GV)) {
+ const Constant *ConstVal = GV;
+ MachineBasicBlock &BB = *I.getParent();
+ Register NewReg = GR.find(ConstVal, GR.CurMF);
+ if (!NewReg.isValid()) {
+ SPIRVType *SpvBaseTy = GR.getOrCreateSPIRVIntegerType(8, I, TII);
+ ResType = GR.getOrCreateSPIRVPointerType(SpvBaseTy, I, TII);
+ Register NewReg = ResVReg;
+ GR.add(ConstVal, GR.CurMF, NewReg);
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
+ .addDef(NewReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .constrainAllUses(TII, TRI, RBI);
+ }
+ assert(NewReg != ResVReg);
+ return BuildMI(BB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY))
+ .addDef(ResVReg)
+ .addUse(NewReg)
+ .constrainAllUses(TII, TRI, RBI);
+ }
auto GlobalVar = cast<GlobalVariable>(GV);
+ assert(GlobalVar->getName() != "llvm.global.annotations");
bool HasInit = GlobalVar->hasInitializer() &&
!isa<UndefValue>(GlobalVar->getInitializer());
diff --git a/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp
index 8e4ab973bf07..8aaac50c94d7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp
@@ -45,7 +45,12 @@ void SPIRVMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI,
break;
}
case MachineOperand::MO_Immediate:
- MCOp = MCOperand::createImm(MO.getImm());
+ if (MI->getOpcode() == SPIRV::OpExtInst && i == 2) {
+ Register Reg = MAI->getExtInstSetReg(MO.getImm());
+ MCOp = MCOperand::createReg(Reg);
+ } else {
+ MCOp = MCOperand::createImm(MO.getImm());
+ }
break;
case MachineOperand::MO_FPImmediate:
MCOp = MCOperand::createDFPImm(
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index a39df5234935..143ddf7297dc 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -60,62 +60,50 @@ void SPIRVModuleAnalysis::setBaseInfo(const Module &M) {
MAI.InstrsToDelete.clear();
MAI.FuncNameMap.clear();
MAI.GlobalVarList.clear();
+ MAI.ExtInstSetMap.clear();
// TODO: determine memory model and source language from the configuratoin.
- MAI.Mem = SPIRV::MemoryModel::OpenCL;
- MAI.SrcLang = SPIRV::SourceLanguage::OpenCL_C;
- unsigned PtrSize = ST->getPointerSize();
- MAI.Addr = PtrSize == 32 ? SPIRV::AddressingModel::Physical32
- : PtrSize == 64 ? SPIRV::AddressingModel::Physical64
- : SPIRV::AddressingModel::Logical;
+ if (auto MemModel = M.getNamedMetadata("spirv.MemoryModel")) {
+ auto MemMD = MemModel->getOperand(0);
+ MAI.Addr = static_cast<SPIRV::AddressingModel>(getMetadataUInt(MemMD, 0));
+ MAI.Mem = static_cast<SPIRV::MemoryModel>(getMetadataUInt(MemMD, 1));
+ } else {
+ MAI.Mem = SPIRV::MemoryModel::OpenCL;
+ unsigned PtrSize = ST->getPointerSize();
+ MAI.Addr = PtrSize == 32 ? SPIRV::AddressingModel::Physical32
+ : PtrSize == 64 ? SPIRV::AddressingModel::Physical64
+ : SPIRV::AddressingModel::Logical;
+ }
// Get the OpenCL version number from metadata.
// TODO: support other source languages.
- MAI.SrcLangVersion = 0;
if (auto VerNode = M.getNamedMetadata("opencl.ocl.version")) {
- // Construct version literal according to OpenCL 2.2 environment spec.
+ MAI.SrcLang = SPIRV::SourceLanguage::OpenCL_C;
+ // Construct version literal in accordance with SPIRV-LLVM-Translator.
+ // TODO: support multiple OCL version metadata.
+ assert(VerNode->getNumOperands() > 0 && "Invalid SPIR");
auto VersionMD = VerNode->getOperand(0);
unsigned MajorNum = getMetadataUInt(VersionMD, 0, 2);
unsigned MinorNum = getMetadataUInt(VersionMD, 1);
unsigned RevNum = getMetadataUInt(VersionMD, 2);
- MAI.SrcLangVersion = 0 | (MajorNum << 16) | (MinorNum << 8) | RevNum;
+ MAI.SrcLangVersion = (MajorNum * 100 + MinorNum) * 1000 + RevNum;
+ } else {
+ MAI.SrcLang = SPIRV::SourceLanguage::Unknown;
+ MAI.SrcLangVersion = 0;
}
-}
-// True if there is an instruction in the MS list with all the same operands as
-// the given instruction has (after the given starting index).
-// TODO: maybe it needs to check Opcodes too.
-static bool findSameInstrInMS(const MachineInstr &A,
- SPIRV::ModuleSectionType MSType,
- SPIRV::ModuleAnalysisInfo &MAI,
- bool UpdateRegAliases,
- unsigned StartOpIndex = 0) {
- for (const auto *B : MAI.MS[MSType]) {
- const unsigned NumAOps = A.getNumOperands();
- if (NumAOps == B->getNumOperands() && A.getNumDefs() == B->getNumDefs()) {
- bool AllOpsMatch = true;
- for (unsigned i = StartOpIndex; i < NumAOps && AllOpsMatch; ++i) {
- if (A.getOperand(i).isReg() && B->getOperand(i).isReg()) {
- Register RegA = A.getOperand(i).getReg();
- Register RegB = B->getOperand(i).getReg();
- AllOpsMatch = MAI.getRegisterAlias(A.getMF(), RegA) ==
- MAI.getRegisterAlias(B->getMF(), RegB);
- } else {
- AllOpsMatch = A.getOperand(i).isIdenticalTo(B->getOperand(i));
- }
- }
- if (AllOpsMatch) {
- if (UpdateRegAliases) {
- assert(A.getOperand(0).isReg() && B->getOperand(0).isReg());
- Register LocalReg = A.getOperand(0).getReg();
- Register GlobalReg =
- MAI.getRegisterAlias(B->getMF(), B->getOperand(0).getReg());
- MAI.setRegisterAlias(A.getMF(), LocalReg, GlobalReg);
- }
- return true;
- }
+ if (auto ExtNode = M.getNamedMetadata("opencl.used.extensions")) {
+ for (unsigned I = 0, E = ExtNode->getNumOperands(); I != E; ++I) {
+ MDNode *MD = ExtNode->getOperand(I);
+ if (!MD || MD->getNumOperands() == 0)
+ continue;
+ for (unsigned J = 0, N = MD->getNumOperands(); J != N; ++J)
+ MAI.SrcExt.insert(cast<MDString>(MD->getOperand(J))->getString());
}
}
- return false;
+
+ // TODO: check if it's required by default.
+ MAI.ExtInstSetMap[static_cast<unsigned>(SPIRV::InstructionSet::OpenCL_std)] =
+ Register::index2VirtReg(MAI.getNextID());
}
// Collect MI which defines the register in the given machine function.
@@ -135,7 +123,7 @@ void SPIRVModuleAnalysis::collectGlobalEntities(
const std::vector<SPIRV::DTSortableEntry *> &DepsGraph,
SPIRV::ModuleSectionType MSType,
std::function<bool(const SPIRV::DTSortableEntry *)> Pred,
- bool UsePreOrder) {
+ bool UsePreOrder = false) {
DenseSet<const SPIRV::DTSortableEntry *> Visited;
for (const auto *E : DepsGraph) {
std::function<void(const SPIRV::DTSortableEntry *)> RecHoistUtil;
@@ -188,13 +176,41 @@ void SPIRVModuleAnalysis::processDefInstrs(const Module &M) {
collectGlobalEntities(
DepsGraph, SPIRV::MB_TypeConstVars,
- [](const SPIRV::DTSortableEntry *E) { return !E->getIsFunc(); }, false);
+ [](const SPIRV::DTSortableEntry *E) { return !E->getIsFunc(); });
collectGlobalEntities(
DepsGraph, SPIRV::MB_ExtFuncDecls,
[](const SPIRV::DTSortableEntry *E) { return E->getIsFunc(); }, true);
}
+// True if there is an instruction in the MS list with all the same operands as
+// the given instruction has (after the given starting index).
+// TODO: maybe it needs to check Opcodes too.
+static bool findSameInstrInMS(const MachineInstr &A,
+ SPIRV::ModuleSectionType MSType,
+ SPIRV::ModuleAnalysisInfo &MAI,
+ unsigned StartOpIndex = 0) {
+ for (const auto *B : MAI.MS[MSType]) {
+ const unsigned NumAOps = A.getNumOperands();
+ if (NumAOps != B->getNumOperands() || A.getNumDefs() != B->getNumDefs())
+ continue;
+ bool AllOpsMatch = true;
+ for (unsigned i = StartOpIndex; i < NumAOps && AllOpsMatch; ++i) {
+ if (A.getOperand(i).isReg() && B->getOperand(i).isReg()) {
+ Register RegA = A.getOperand(i).getReg();
+ Register RegB = B->getOperand(i).getReg();
+ AllOpsMatch = MAI.getRegisterAlias(A.getMF(), RegA) ==
+ MAI.getRegisterAlias(B->getMF(), RegB);
+ } else {
+ AllOpsMatch = A.getOperand(i).isIdenticalTo(B->getOperand(i));
+ }
+ }
+ if (AllOpsMatch)
+ return true;
+ }
+ return false;
+}
+
// Look for IDs declared with Import linkage, and map the imported name string
// to the register defining that variable (which will usually be the result of
// an OpFunction). This lets us call externally imported functions using
@@ -228,12 +244,16 @@ void SPIRVModuleAnalysis::collectFuncNames(MachineInstr &MI,
// numbering has already occurred by this point. We can directly compare reg
// arguments when detecting duplicates.
static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI,
- SPIRV::ModuleSectionType MSType) {
+ SPIRV::ModuleSectionType MSType,
+ bool Append = true) {
MAI.setSkipEmission(&MI);
- if (findSameInstrInMS(MI, MSType, MAI, false))
+ if (findSameInstrInMS(MI, MSType, MAI))
return; // Found a duplicate, so don't add it.
// No duplicates, so add it.
- MAI.MS[MSType].push_back(&MI);
+ if (Append)
+ MAI.MS[MSType].push_back(&MI);
+ else
+ MAI.MS[MSType].insert(MAI.MS[MSType].begin(), &MI);
}
// Some global instructions make reference to function-local ID regs, so cannot
@@ -256,15 +276,22 @@ void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) {
} else if (TII->isDecorationInstr(MI)) {
collectOtherInstr(MI, MAI, SPIRV::MB_Annotations);
collectFuncNames(MI, *F);
+ } else if (TII->isConstantInstr(MI)) {
+ // Now OpSpecConstant*s are not in DT,
+ // but they need to be collected anyway.
+ collectOtherInstr(MI, MAI, SPIRV::MB_TypeConstVars);
} else if (OpCode == SPIRV::OpFunction) {
collectFuncNames(MI, *F);
+ } else if (OpCode == SPIRV::OpTypeForwardPointer) {
+ collectOtherInstr(MI, MAI, SPIRV::MB_TypeConstVars, false);
}
}
}
}
// Number registers in all functions globally from 0 onwards and store
-// the result in global register alias table.
+// the result in global register alias table. Some registers are already
+// numbered in collectGlobalEntities.
void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) {
for (auto F = M.begin(), E = M.end(); F != E; ++F) {
if ((*F).isDeclaration())
@@ -282,11 +309,50 @@ void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) {
Register NewReg = Register::index2VirtReg(MAI.getNextID());
MAI.setRegisterAlias(MF, Reg, NewReg);
}
+ if (MI.getOpcode() != SPIRV::OpExtInst)
+ continue;
+ auto Set = MI.getOperand(2).getImm();
+ if (MAI.ExtInstSetMap.find(Set) == MAI.ExtInstSetMap.end())
+ MAI.ExtInstSetMap[Set] = Register::index2VirtReg(MAI.getNextID());
}
}
}
}
+// Find OpIEqual and OpBranchConditional instructions originating from
+// OpSwitches, mark them skipped for emission. Also mark MBB skipped if it
+// contains only these instructions.
+static void processSwitches(const Module &M, SPIRV::ModuleAnalysisInfo &MAI,
+ MachineModuleInfo *MMI) {
+ DenseSet<Register> SwitchRegs;
+ for (auto F = M.begin(), E = M.end(); F != E; ++F) {
+ MachineFunction *MF = MMI->getMachineFunction(*F);
+ if (!MF)
+ continue;
+ for (MachineBasicBlock &MBB : *MF)
+ for (MachineInstr &MI : MBB) {
+ if (MAI.getSkipEmission(&MI))
+ continue;
+ if (MI.getOpcode() == SPIRV::OpSwitch) {
+ assert(MI.getOperand(0).isReg());
+ SwitchRegs.insert(MI.getOperand(0).getReg());
+ }
+ if (MI.getOpcode() != SPIRV::OpIEqual || !MI.getOperand(2).isReg() ||
+ !SwitchRegs.contains(MI.getOperand(2).getReg()))
+ continue;
+ Register CmpReg = MI.getOperand(0).getReg();
+ MachineInstr *CBr = MI.getNextNode();
+ assert(CBr && CBr->getOpcode() == SPIRV::OpBranchConditional &&
+ CBr->getOperand(0).isReg() &&
+ CBr->getOperand(0).getReg() == CmpReg);
+ MAI.setSkipEmission(&MI);
+ MAI.setSkipEmission(CBr);
+ if (&MBB.front() == &MI && &MBB.back() == CBr)
+ MAI.MBBsToSkip.insert(&MBB);
+ }
+ }
+}
+
struct SPIRV::ModuleAnalysisInfo SPIRVModuleAnalysis::MAI;
void SPIRVModuleAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -305,7 +371,9 @@ bool SPIRVModuleAnalysis::runOnModule(Module &M) {
setBaseInfo(M);
- // TODO: Process type/const/global var/func decl instructions, number their
+ processSwitches(M, MAI, MMI);
+
+ // Process type/const/global var/func decl instructions, number their
// destination registers from 0 to N, collect Extensions and Capabilities.
processDefInstrs(M);
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
index 585868909d28..9bcdf6e9ae2a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -52,6 +52,9 @@ struct ModuleAnalysisInfo {
SPIRV::AddressingModel Addr;
SPIRV::SourceLanguage SrcLang;
unsigned SrcLangVersion;
+ StringSet<> SrcExt;
+ // Maps ExtInstSet to corresponding ID register.
+ DenseMap<unsigned, Register> ExtInstSetMap;
// Contains the list of all global OpVariables in the module.
SmallVector<MachineInstr *, 4> GlobalVarList;
// Maps function names to coresponding function ID registers.
@@ -59,6 +62,9 @@ struct ModuleAnalysisInfo {
// The set contains machine instructions which are necessary
// for correct MIR but will not be emitted in function bodies.
DenseSet<MachineInstr *> InstrsToDelete;
+ // The set contains machine basic blocks which are necessary
+ // for correct MIR but will not be emitted.
+ DenseSet<MachineBasicBlock *> MBBsToSkip;
// The table contains global aliases of local registers for each machine
// function. The aliases are used to substitute local registers during
// code emission.
@@ -75,6 +81,7 @@ struct ModuleAnalysisInfo {
assert(FuncReg != FuncNameMap.end() && "Cannot find function Id");
return FuncReg->second;
}
+ Register getExtInstSetReg(unsigned SetNum) { return ExtInstSetMap[SetNum]; }
InstrList &getMSInstrs(unsigned MSType) { return MS[MSType]; }
void setSkipEmission(MachineInstr *MI) { InstrsToDelete.insert(MI); }
bool getSkipEmission(const MachineInstr *MI) {
@@ -123,7 +130,6 @@ public:
private:
void setBaseInfo(const Module &M);
- template <typename T> void collectTypesConstsVars();
void collectGlobalEntities(
const std::vector<SPIRV::DTSortableEntry *> &DepsGraph,
SPIRV::ModuleSectionType MSType,
diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
index 687f84046650..e620226dcc7a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -39,11 +39,58 @@ public:
};
} // namespace
-static bool isSpvIntrinsic(MachineInstr &MI, Intrinsic::ID IntrinsicID) {
- if (MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
- MI.getIntrinsicID() == IntrinsicID)
- return true;
- return false;
+static void addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ DenseMap<MachineInstr *, Register> RegsAlreadyAddedToDT;
+ SmallVector<MachineInstr *, 10> ToErase, ToEraseComposites;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!isSpvIntrinsic(MI, Intrinsic::spv_track_constant))
+ continue;
+ ToErase.push_back(&MI);
+ auto *Const =
+ cast<Constant>(cast<ConstantAsMetadata>(
+ MI.getOperand(3).getMetadata()->getOperand(0))
+ ->getValue());
+ if (auto *GV = dyn_cast<GlobalValue>(Const)) {
+ Register Reg = GR->find(GV, &MF);
+ if (!Reg.isValid())
+ GR->add(GV, &MF, MI.getOperand(2).getReg());
+ else
+ RegsAlreadyAddedToDT[&MI] = Reg;
+ } else {
+ Register Reg = GR->find(Const, &MF);
+ if (!Reg.isValid()) {
+ if (auto *ConstVec = dyn_cast<ConstantDataVector>(Const)) {
+ auto *BuildVec = MRI.getVRegDef(MI.getOperand(2).getReg());
+ assert(BuildVec &&
+ BuildVec->getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ for (unsigned i = 0; i < ConstVec->getNumElements(); ++i)
+ GR->add(ConstVec->getElementAsConstant(i), &MF,
+ BuildVec->getOperand(1 + i).getReg());
+ }
+ GR->add(Const, &MF, MI.getOperand(2).getReg());
+ } else {
+ RegsAlreadyAddedToDT[&MI] = Reg;
+ // This MI is unused and will be removed. If the MI uses
+ // const_composite, it will be unused and should be removed too.
+ assert(MI.getOperand(2).isReg() && "Reg operand is expected");
+ MachineInstr *SrcMI = MRI.getVRegDef(MI.getOperand(2).getReg());
+ if (SrcMI && isSpvIntrinsic(*SrcMI, Intrinsic::spv_const_composite))
+ ToEraseComposites.push_back(SrcMI);
+ }
+ }
+ }
+ }
+ for (MachineInstr *MI : ToErase) {
+ Register Reg = MI->getOperand(2).getReg();
+ if (RegsAlreadyAddedToDT.find(MI) != RegsAlreadyAddedToDT.end())
+ Reg = RegsAlreadyAddedToDT[MI];
+ MRI.replaceRegWith(MI->getOperand(0).getReg(), Reg);
+ MI->eraseFromParent();
+ }
+ for (MachineInstr *MI : ToEraseComposites)
+ MI->eraseFromParent();
}
static void foldConstantsIntoIntrinsics(MachineFunction &MF) {
@@ -120,6 +167,7 @@ static SPIRVType *propagateSPIRVType(MachineInstr *MI, SPIRVGlobalRegistry *GR,
}
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_ADDRSPACE_CAST:
+ case TargetOpcode::G_PTR_ADD:
case TargetOpcode::COPY: {
MachineOperand &Op = MI->getOperand(1);
MachineInstr *Def = Op.isReg() ? MRI.getVRegDef(Op.getReg()) : nullptr;
@@ -308,6 +356,22 @@ static void processInstrsWithTypeFolding(MachineFunction &MF,
processInstr(MI, MIB, MRI, GR);
}
}
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ // We need to rewrite dst types for ASSIGN_TYPE instrs to be able
+ // to perform tblgen'erated selection and we can't do that on Legalizer
+ // as it operates on gMIR only.
+ if (MI.getOpcode() != SPIRV::ASSIGN_TYPE)
+ continue;
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!isTypeFoldingSupported(MRI.getVRegDef(SrcReg)->getOpcode()))
+ continue;
+ Register DstReg = MI.getOperand(0).getReg();
+ if (MRI.getType(DstReg).isVector())
+ MRI.setRegClass(DstReg, &SPIRV::IDRegClass);
+ MRI.setType(DstReg, LLT::scalar(32));
+ }
+ }
}
static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR,
@@ -421,6 +485,7 @@ bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) {
SPIRVGlobalRegistry *GR = ST.getSPIRVGlobalRegistry();
GR->setCurrentFunc(MF);
MachineIRBuilder MIB(MF);
+ addConstantsToTrack(MF, GR);
foldConstantsIntoIntrinsics(MF);
insertBitcasts(MF, GR, MIB);
generateAssignInstrs(MF, GR, MIB);
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
new file mode 100644
index 000000000000..13c3c12c1b41
--- /dev/null
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -0,0 +1,288 @@
+//===-- SPIRVPrepareFunctions.cpp - modify function signatures --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass modifies function signatures containing aggregate arguments
+// and/or return value. Also it substitutes some llvm intrinsic calls by
+// function calls, generating these functions as the translator does.
+//
+// NOTE: this pass is a module-level one due to the necessity to modify
+// GVs/functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPIRV.h"
+#include "SPIRVTargetMachine.h"
+#include "SPIRVUtils.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeSPIRVPrepareFunctionsPass(PassRegistry &);
+}
+
+namespace {
+
+class SPIRVPrepareFunctions : public ModulePass {
+ Function *processFunctionSignature(Function *F);
+
+public:
+ static char ID;
+ SPIRVPrepareFunctions() : ModulePass(ID) {
+ initializeSPIRVPrepareFunctionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ StringRef getPassName() const override { return "SPIRV prepare functions"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ ModulePass::getAnalysisUsage(AU);
+ }
+};
+
+} // namespace
+
+char SPIRVPrepareFunctions::ID = 0;
+
+INITIALIZE_PASS(SPIRVPrepareFunctions, "prepare-functions",
+ "SPIRV prepare functions", false, false)
+
+Function *SPIRVPrepareFunctions::processFunctionSignature(Function *F) {
+ IRBuilder<> B(F->getContext());
+
+ bool IsRetAggr = F->getReturnType()->isAggregateType();
+ bool HasAggrArg =
+ std::any_of(F->arg_begin(), F->arg_end(), [](Argument &Arg) {
+ return Arg.getType()->isAggregateType();
+ });
+ bool DoClone = IsRetAggr || HasAggrArg;
+ if (!DoClone)
+ return F;
+ SmallVector<std::pair<int, Type *>, 4> ChangedTypes;
+ Type *RetType = IsRetAggr ? B.getInt32Ty() : F->getReturnType();
+ if (IsRetAggr)
+ ChangedTypes.push_back(std::pair<int, Type *>(-1, F->getReturnType()));
+ SmallVector<Type *, 4> ArgTypes;
+ for (const auto &Arg : F->args()) {
+ if (Arg.getType()->isAggregateType()) {
+ ArgTypes.push_back(B.getInt32Ty());
+ ChangedTypes.push_back(
+ std::pair<int, Type *>(Arg.getArgNo(), Arg.getType()));
+ } else
+ ArgTypes.push_back(Arg.getType());
+ }
+ FunctionType *NewFTy =
+ FunctionType::get(RetType, ArgTypes, F->getFunctionType()->isVarArg());
+ Function *NewF =
+ Function::Create(NewFTy, F->getLinkage(), F->getName(), *F->getParent());
+
+ ValueToValueMapTy VMap;
+ auto NewFArgIt = NewF->arg_begin();
+ for (auto &Arg : F->args()) {
+ StringRef ArgName = Arg.getName();
+ NewFArgIt->setName(ArgName);
+ VMap[&Arg] = &(*NewFArgIt++);
+ }
+ SmallVector<ReturnInst *, 8> Returns;
+
+ CloneFunctionInto(NewF, F, VMap, CloneFunctionChangeType::LocalChangesOnly,
+ Returns);
+ NewF->takeName(F);
+
+ NamedMDNode *FuncMD =
+ F->getParent()->getOrInsertNamedMetadata("spv.cloned_funcs");
+ SmallVector<Metadata *, 2> MDArgs;
+ MDArgs.push_back(MDString::get(B.getContext(), NewF->getName()));
+ for (auto &ChangedTyP : ChangedTypes)
+ MDArgs.push_back(MDNode::get(
+ B.getContext(),
+ {ConstantAsMetadata::get(B.getInt32(ChangedTyP.first)),
+ ValueAsMetadata::get(Constant::getNullValue(ChangedTyP.second))}));
+ MDNode *ThisFuncMD = MDNode::get(B.getContext(), MDArgs);
+ FuncMD->addOperand(ThisFuncMD);
+
+ for (auto *U : make_early_inc_range(F->users())) {
+ if (auto *CI = dyn_cast<CallInst>(U))
+ CI->mutateFunctionType(NewF->getFunctionType());
+ U->replaceUsesOfWith(F, NewF);
+ }
+ return NewF;
+}
+
+std::string lowerLLVMIntrinsicName(IntrinsicInst *II) {
+ Function *IntrinsicFunc = II->getCalledFunction();
+ assert(IntrinsicFunc && "Missing function");
+ std::string FuncName = IntrinsicFunc->getName().str();
+ std::replace(FuncName.begin(), FuncName.end(), '.', '_');
+ FuncName = "spirv." + FuncName;
+ return FuncName;
+}
+
+static Function *getOrCreateFunction(Module *M, Type *RetTy,
+ ArrayRef<Type *> ArgTypes,
+ StringRef Name) {
+ FunctionType *FT = FunctionType::get(RetTy, ArgTypes, false);
+ Function *F = M->getFunction(Name);
+ if (F && F->getFunctionType() == FT)
+ return F;
+ Function *NewF = Function::Create(FT, GlobalValue::ExternalLinkage, Name, M);
+ if (F)
+ NewF->setDSOLocal(F->isDSOLocal());
+ NewF->setCallingConv(CallingConv::SPIR_FUNC);
+ return NewF;
+}
+
+static void lowerFunnelShifts(Module *M, IntrinsicInst *FSHIntrinsic) {
+ // Get a separate function - otherwise, we'd have to rework the CFG of the
+ // current one. Then simply replace the intrinsic uses with a call to the new
+ // function.
+ // Generate LLVM IR for i* @spirv.llvm_fsh?_i* (i* %a, i* %b, i* %c)
+ FunctionType *FSHFuncTy = FSHIntrinsic->getFunctionType();
+ Type *FSHRetTy = FSHFuncTy->getReturnType();
+ const std::string FuncName = lowerLLVMIntrinsicName(FSHIntrinsic);
+ Function *FSHFunc =
+ getOrCreateFunction(M, FSHRetTy, FSHFuncTy->params(), FuncName);
+
+ if (!FSHFunc->empty()) {
+ FSHIntrinsic->setCalledFunction(FSHFunc);
+ return;
+ }
+ BasicBlock *RotateBB = BasicBlock::Create(M->getContext(), "rotate", FSHFunc);
+ IRBuilder<> IRB(RotateBB);
+ Type *Ty = FSHFunc->getReturnType();
+ // Build the actual funnel shift rotate logic.
+ // In the comments, "int" is used interchangeably with "vector of int
+ // elements".
+ FixedVectorType *VectorTy = dyn_cast<FixedVectorType>(Ty);
+ Type *IntTy = VectorTy ? VectorTy->getElementType() : Ty;
+ unsigned BitWidth = IntTy->getIntegerBitWidth();
+ ConstantInt *BitWidthConstant = IRB.getInt({BitWidth, BitWidth});
+ Value *BitWidthForInsts =
+ VectorTy
+ ? IRB.CreateVectorSplat(VectorTy->getNumElements(), BitWidthConstant)
+ : BitWidthConstant;
+ Value *RotateModVal =
+ IRB.CreateURem(/*Rotate*/ FSHFunc->getArg(2), BitWidthForInsts);
+ Value *FirstShift = nullptr, *SecShift = nullptr;
+ if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr) {
+ // Shift the less significant number right, the "rotate" number of bits
+ // will be 0-filled on the left as a result of this regular shift.
+ FirstShift = IRB.CreateLShr(FSHFunc->getArg(1), RotateModVal);
+ } else {
+ // Shift the more significant number left, the "rotate" number of bits
+ // will be 0-filled on the right as a result of this regular shift.
+ FirstShift = IRB.CreateShl(FSHFunc->getArg(0), RotateModVal);
+ }
+ // We want the "rotate" number of the more significant int's LSBs (MSBs) to
+ // occupy the leftmost (rightmost) "0 space" left by the previous operation.
+ // Therefore, subtract the "rotate" number from the integer bitsize...
+ Value *SubRotateVal = IRB.CreateSub(BitWidthForInsts, RotateModVal);
+ if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr) {
+ // ...and left-shift the more significant int by this number, zero-filling
+ // the LSBs.
+ SecShift = IRB.CreateShl(FSHFunc->getArg(0), SubRotateVal);
+ } else {
+ // ...and right-shift the less significant int by this number, zero-filling
+ // the MSBs.
+ SecShift = IRB.CreateLShr(FSHFunc->getArg(1), SubRotateVal);
+ }
+ // A simple binary addition of the shifted ints yields the final result.
+ IRB.CreateRet(IRB.CreateOr(FirstShift, SecShift));
+
+ FSHIntrinsic->setCalledFunction(FSHFunc);
+}
+
+static void buildUMulWithOverflowFunc(Module *M, Function *UMulFunc) {
+ // The function body is already created.
+ if (!UMulFunc->empty())
+ return;
+
+ BasicBlock *EntryBB = BasicBlock::Create(M->getContext(), "entry", UMulFunc);
+ IRBuilder<> IRB(EntryBB);
+ // Build the actual unsigned multiplication logic with the overflow
+ // indication. Do unsigned multiplication Mul = A * B. Then check
+ // if unsigned division Div = Mul / A is not equal to B. If so,
+ // then overflow has happened.
+ Value *Mul = IRB.CreateNUWMul(UMulFunc->getArg(0), UMulFunc->getArg(1));
+ Value *Div = IRB.CreateUDiv(Mul, UMulFunc->getArg(0));
+ Value *Overflow = IRB.CreateICmpNE(UMulFunc->getArg(0), Div);
+
+ // umul.with.overflow intrinsic return a structure, where the first element
+ // is the multiplication result, and the second is an overflow bit.
+ Type *StructTy = UMulFunc->getReturnType();
+ Value *Agg = IRB.CreateInsertValue(UndefValue::get(StructTy), Mul, {0});
+ Value *Res = IRB.CreateInsertValue(Agg, Overflow, {1});
+ IRB.CreateRet(Res);
+}
+
+static void lowerUMulWithOverflow(Module *M, IntrinsicInst *UMulIntrinsic) {
+ // Get a separate function - otherwise, we'd have to rework the CFG of the
+ // current one. Then simply replace the intrinsic uses with a call to the new
+ // function.
+ FunctionType *UMulFuncTy = UMulIntrinsic->getFunctionType();
+ Type *FSHLRetTy = UMulFuncTy->getReturnType();
+ const std::string FuncName = lowerLLVMIntrinsicName(UMulIntrinsic);
+ Function *UMulFunc =
+ getOrCreateFunction(M, FSHLRetTy, UMulFuncTy->params(), FuncName);
+ buildUMulWithOverflowFunc(M, UMulFunc);
+ UMulIntrinsic->setCalledFunction(UMulFunc);
+}
+
+static void substituteIntrinsicCalls(Module *M, Function *F) {
+ for (BasicBlock &BB : *F) {
+ for (Instruction &I : BB) {
+ auto Call = dyn_cast<CallInst>(&I);
+ if (!Call)
+ continue;
+ Call->setTailCall(false);
+ Function *CF = Call->getCalledFunction();
+ if (!CF || !CF->isIntrinsic())
+ continue;
+ auto *II = cast<IntrinsicInst>(Call);
+ if (II->getIntrinsicID() == Intrinsic::fshl ||
+ II->getIntrinsicID() == Intrinsic::fshr)
+ lowerFunnelShifts(M, II);
+ else if (II->getIntrinsicID() == Intrinsic::umul_with_overflow)
+ lowerUMulWithOverflow(M, II);
+ }
+ }
+}
+
+bool SPIRVPrepareFunctions::runOnModule(Module &M) {
+ for (Function &F : M)
+ substituteIntrinsicCalls(&M, &F);
+
+ std::vector<Function *> FuncsWorklist;
+ bool Changed = false;
+ for (auto &F : M)
+ FuncsWorklist.push_back(&F);
+
+ for (auto *Func : FuncsWorklist) {
+ Function *F = processFunctionSignature(Func);
+
+ bool CreatedNewF = F != Func;
+
+ if (Func->isDeclaration()) {
+ Changed |= CreatedNewF;
+ continue;
+ }
+
+ if (CreatedNewF)
+ Func->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+ModulePass *llvm::createSPIRVPrepareFunctionsPass() {
+ return new SPIRVPrepareFunctions();
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
index cdf3a160f373..00549c7b5768 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
@@ -46,8 +46,7 @@ SPIRVSubtarget::SPIRVSubtarget(const Triple &TT, const std::string &CPU,
PointerSize(computePointerSize(TT)), SPIRVVersion(0), InstrInfo(),
FrameLowering(initSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {
GR = std::make_unique<SPIRVGlobalRegistry>(PointerSize);
- CallLoweringInfo =
- std::make_unique<SPIRVCallLowering>(TLInfo, *this, GR.get());
+ CallLoweringInfo = std::make_unique<SPIRVCallLowering>(TLInfo, GR.get());
Legalizer = std::make_unique<SPIRVLegalizerInfo>(*this);
RegBankInfo = std::make_unique<SPIRVRegisterBankInfo>();
InstSelector.reset(
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index f7c88a5c6d4a..7f5f14dc3ce8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -140,7 +140,10 @@ TargetPassConfig *SPIRVTargetMachine::createPassConfig(PassManagerBase &PM) {
return new SPIRVPassConfig(*this, PM);
}
-void SPIRVPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); }
+void SPIRVPassConfig::addIRPasses() {
+ TargetPassConfig::addIRPasses();
+ addPass(createSPIRVPrepareFunctionsPass());
+}
void SPIRVPassConfig::addISelPrepare() {
addPass(createSPIRVEmitIntrinsicsPass(&getTM<SPIRVTargetMachine>()));
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index b92dc12735f8..15671ef3e512 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -45,6 +45,14 @@ static size_t getPaddedLen(const StringRef &Str) {
return (Len % 4 == 0) ? Len : Len + (4 - (Len % 4));
}
+void addStringImm(const StringRef &Str, MCInst &Inst) {
+ const size_t PaddedLen = getPaddedLen(Str);
+ for (unsigned i = 0; i < PaddedLen; i += 4) {
+ // Add an operand for the 32-bits of chars or padding.
+ Inst.addOperand(MCOperand::createImm(convertCharsToWord(Str, i)));
+ }
+}
+
void addStringImm(const StringRef &Str, MachineInstrBuilder &MIB) {
const size_t PaddedLen = getPaddedLen(Str);
for (unsigned i = 0; i < PaddedLen; i += 4) {
@@ -182,6 +190,24 @@ SPIRV::MemorySemantics getMemSemanticsForStorageClass(SPIRV::StorageClass SC) {
}
}
+SPIRV::MemorySemantics getMemSemantics(AtomicOrdering Ord) {
+ switch (Ord) {
+ case AtomicOrdering::Acquire:
+ return SPIRV::MemorySemantics::Acquire;
+ case AtomicOrdering::Release:
+ return SPIRV::MemorySemantics::Release;
+ case AtomicOrdering::AcquireRelease:
+ return SPIRV::MemorySemantics::AcquireRelease;
+ case AtomicOrdering::SequentiallyConsistent:
+ return SPIRV::MemorySemantics::SequentiallyConsistent;
+ case AtomicOrdering::Unordered:
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::NotAtomic:
+ default:
+ return SPIRV::MemorySemantics::None;
+ }
+}
+
MachineInstr *getDefInstrMaybeConstant(Register &ConstReg,
const MachineRegisterInfo *MRI) {
MachineInstr *ConstInstr = MRI->getVRegDef(ConstReg);
@@ -202,6 +228,11 @@ uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI) {
return MI->getOperand(1).getCImm()->getValue().getZExtValue();
}
+bool isSpvIntrinsic(MachineInstr &MI, Intrinsic::ID IntrinsicID) {
+ return MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
+ MI.getIntrinsicID() == IntrinsicID;
+}
+
Type *getMDOperandAsType(const MDNode *N, unsigned I) {
return cast<ValueAsMetadata>(N->getOperand(I))->getType();
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index ffa82c9c1fe4..35e24b076570 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -32,6 +32,7 @@ class SPIRVInstrInfo;
// Add the given string as a series of integer operand, inserting null
// terminators and padding to make sure the operands all have 32-bit
// little-endian words.
+void addStringImm(const llvm::StringRef &Str, llvm::MCInst &Inst);
void addStringImm(const llvm::StringRef &Str, llvm::MachineInstrBuilder &MIB);
void addStringImm(const llvm::StringRef &Str, llvm::IRBuilder<> &B,
std::vector<llvm::Value *> &Args);
@@ -67,6 +68,8 @@ llvm::SPIRV::StorageClass addressSpaceToStorageClass(unsigned AddrSpace);
llvm::SPIRV::MemorySemantics
getMemSemanticsForStorageClass(llvm::SPIRV::StorageClass SC);
+llvm::SPIRV::MemorySemantics getMemSemantics(llvm::AtomicOrdering Ord);
+
// Find def instruction for the given ConstReg, walking through
// spv_track_constant and ASSIGN_TYPE instructions. Updates ConstReg by def
// of OpConstant instruction.
@@ -78,6 +81,9 @@ getDefInstrMaybeConstant(llvm::Register &ConstReg,
uint64_t getIConstVal(llvm::Register ConstReg,
const llvm::MachineRegisterInfo *MRI);
+// Check if MI is a SPIR-V specific intrinsic call.
+bool isSpvIntrinsic(llvm::MachineInstr &MI, llvm::Intrinsic::ID IntrinsicID);
+
// Get type of i-th operand of the metadata node.
llvm::Type *getMDOperandAsType(const llvm::MDNode *N, unsigned I);
#endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
diff --git a/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index 1138788ac7fa..1f8837eb0194 100644
--- a/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -24,10 +24,10 @@ Target &llvm::getTheSparcelTarget() {
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTargetInfo() {
- RegisterTarget<Triple::sparc, /*HasJIT=*/true> X(getTheSparcTarget(), "sparc",
- "Sparc", "Sparc");
- RegisterTarget<Triple::sparcv9, /*HasJIT=*/true> Y(
+ RegisterTarget<Triple::sparc, /*HasJIT=*/false> X(getTheSparcTarget(),
+ "sparc", "Sparc", "Sparc");
+ RegisterTarget<Triple::sparcv9, /*HasJIT=*/false> Y(
getTheSparcV9Target(), "sparcv9", "Sparc V9", "Sparc");
- RegisterTarget<Triple::sparcel, /*HasJIT=*/true> Z(
+ RegisterTarget<Triple::sparcel, /*HasJIT=*/false> Z(
getTheSparcelTarget(), "sparcel", "Sparc LE", "Sparc");
}
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
index 9c73757d7f5c..86eb8365d527 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -28,7 +28,3 @@ const MCPhysReg SystemZ::XPLINK64ArgGPRs[SystemZ::XPLINK64NumArgGPRs] = {
const MCPhysReg SystemZ::XPLINK64ArgFPRs[SystemZ::XPLINK64NumArgFPRs] = {
SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
};
-
-const MCPhysReg SystemZ::XPLINK64ArgVRs[SystemZ::XPLINK64NumArgVRs] = {
- SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27,
- SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31};
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
index f82c61c0f344..387411942aba 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -27,9 +27,6 @@ namespace SystemZ {
const unsigned XPLINK64NumArgFPRs = 4;
extern const MCPhysReg XPLINK64ArgFPRs[XPLINK64NumArgFPRs];
-
- const unsigned XPLINK64NumArgVRs = 8;
- extern const MCPhysReg XPLINK64ArgVRs[XPLINK64NumArgVRs];
} // end namespace SystemZ
class SystemZCCState : public CCState {
@@ -205,41 +202,6 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
return false;
}
-inline bool CC_XPLINK64_Shadow_Stack(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- ArrayRef<MCPhysReg> RegList;
-
- switch (LocVT.SimpleTy) {
- case MVT::i64:
- RegList = SystemZ::XPLINK64ArgGPRs;
- break;
- case MVT::v16i8:
- case MVT::v8i16:
- case MVT::v4i32:
- case MVT::v2i64:
- case MVT::v4f32:
- case MVT::v2f64:
- RegList = SystemZ::XPLINK64ArgVRs;
- break;
- case MVT::f32:
- case MVT::f64:
- case MVT::f128:
- RegList = SystemZ::XPLINK64ArgFPRs;
- break;
- default:
- return false;
- }
-
- unsigned UnallocatedRegisterIndex = State.getFirstUnallocated(RegList);
- // Every time we can allocate a register, allocate on the stack.
- if (UnallocatedRegisterIndex < RegList.size())
- State.AllocateStack(LocVT.getSizeInBits() / 8, Align(8));
-
- return false;
-}
-
inline bool RetCC_SystemZ_Error(unsigned &, MVT &, MVT &,
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
CCState &) {
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index fdd82a01f211..29b4a26736b2 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -221,9 +221,10 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// XPLINK64 ABI compliant code widens integral types smaller than i64
// to i64 before placing the parameters either on the stack or in registers.
CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
- // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRS.
- CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
- CCIfType<[f64], CCIfNotFixed<CCBitConvertToType<i64>>>,
+ // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRs.
+ // Although we assign the f32 vararg to be bitcast, it will first be promoted
+ // to an f64 within convertValVTToLocVT().
+ CCIfType<[f32, f64], CCIfNotFixed<CCBitConvertToType<i64>>>,
// long double, can only be passed in GPR2 and GPR3, if available,
// hence R2Q
CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
@@ -246,34 +247,29 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// The first 3 integer arguments are passed in registers R1D-R3D.
// The rest will be passed in the user area. The address offset of the user
// area can be found in register R4D.
- CCIfType<[i64], CCCustom<"CC_XPLINK64_Shadow_Stack">>,
- CCIfType<[i64], CCAssignToReg<[R1D, R2D, R3D]>>,
+ CCIfType<[i64], CCAssignToRegAndStack<[R1D, R2D, R3D], 8, 8>>,
- // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors
+ // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors
// are passed in the same way, but they're widened to one of these types
// during type legalization.
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>,
- CCIfSubtarget<"hasVector()",
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>>,
+ CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>,
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfFixed<CCAssignToReg<[V24, V25, V26, V27,
- V28, V29, V30, V31]>>>>,
+ CCIfFixed<CCAssignToRegAndStack<[V24, V25, V26, V27,
+ V28, V29, V30, V31], 16, 8>>>>,
- // The first 4 named float and double arguments are passed in registers FPR0-FPR6.
- // The rest will be passed in the user area.
+ // The first 4 named float and double arguments are passed in registers
+ // FPR0-FPR6. The rest will be passed in the user area.
CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
- CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
- CCIfType<[f32], CCIfFixed<CCAssignToReg<[F0S, F2S, F4S, F6S]>>>,
- CCIfType<[f64], CCIfFixed<CCAssignToReg<[F0D, F2D, F4D, F6D]>>>,
+ CCIfType<[f32], CCIfFixed<CCAssignToRegAndStack<[F0S, F2S, F4S, F6S], 4, 8>>>,
+ CCIfType<[f64], CCIfFixed<CCAssignToRegAndStack<[F0D, F2D, F4D, F6D], 8, 8>>>,
+
// The first 2 long double arguments are passed in register FPR0/FPR2
// and FPR4/FPR6. The rest will be passed in the user area.
CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
- CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
- CCIfType<[f128], CCIfFixed<CCAssignToReg<[F0Q, F4Q]>>>,
+ CCIfType<[f128], CCIfFixed<CCAssignToRegAndStack<[F0Q, F4Q], 16, 8>>>,
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 975eb8862e82..d943507b4112 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -911,6 +911,54 @@ SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering()
XPLINKSpillOffsetTable[I].Offset;
}
+// Checks if the function is a potential candidate for being a XPLeaf routine.
+static bool isXPLeafCandidate(const MachineFunction &MF) {
+ const MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ auto *Regs =
+ static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters());
+
+ // If function calls other functions including alloca, then it is not a XPLeaf
+ // routine.
+ if (MFFrame.hasCalls())
+ return false;
+
+ // If the function has var Sized Objects, then it is not a XPLeaf routine.
+ if (MFFrame.hasVarSizedObjects())
+ return false;
+
+ // If the function adjusts the stack, then it is not a XPLeaf routine.
+ if (MFFrame.adjustsStack())
+ return false;
+
+ // If function modifies the stack pointer register, then it is not a XPLeaf
+ // routine.
+ if (MRI.isPhysRegModified(Regs->getStackPointerRegister()))
+ return false;
+
+ // If function modifies the ADA register, then it is not a XPLeaf routine.
+ if (MRI.isPhysRegModified(Regs->getAddressOfCalleeRegister()))
+ return false;
+
+ // If function modifies the return address register, then it is not a XPLeaf
+ // routine.
+ if (MRI.isPhysRegModified(Regs->getReturnFunctionAddressRegister()))
+ return false;
+
+ // If the backchain pointer should be stored, then it is not a XPLeaf routine.
+ if (MF.getFunction().hasFnAttribute("backchain"))
+ return false;
+
+ // If function acquires its own stack frame, then it is not a XPLeaf routine.
+ // At the time this function is called, only slots for local variables are
+ // allocated, so this is a very rough estimate.
+ if (MFFrame.estimateStackSize(MF) > 0)
+ return false;
+
+ return true;
+}
+
bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots(
MachineFunction &MF, const TargetRegisterInfo *TRI,
std::vector<CalleeSavedInfo> &CSI) const {
@@ -920,6 +968,18 @@ bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots(
auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
auto &GRRegClass = SystemZ::GR64BitRegClass;
+ // At this point, the result of isXPLeafCandidate() is not accurate because
+ // the size of the save area has not yet been determined. If
+ // isXPLeafCandidate() indicates a potential leaf function, and there are no
+ // callee-save registers, then it is indeed a leaf function, and we can early
+ // exit.
+ // TODO: It is possible for leaf functions to use callee-saved registers.
+ // It can use the 0-2k range between R4 and the caller's stack frame without
+ // acquiring its own stack frame.
+ bool IsLeaf = CSI.empty() && isXPLeafCandidate(MF);
+ if (IsLeaf)
+ return true;
+
// For non-leaf functions:
// - the address of callee (entry point) register R6 must be saved
CSI.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister()));
@@ -1137,16 +1197,16 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
MachineInstr *StoreInstr = nullptr;
+
+ determineFrameLayout(MF);
+
bool HasFP = hasFP(MF);
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc DL;
uint64_t Offset = 0;
- // TODO: Support leaf functions; only add size of save+reserved area when
- // function is non-leaf.
- MFFrame.setStackSize(MFFrame.getStackSize() + Regs.getCallFrameSize());
- uint64_t StackSize = MFFrame.getStackSize();
+ const uint64_t StackSize = MFFrame.getStackSize();
if (ZFI->getSpillGPRRegs().LowGPR) {
// Skip over the GPR saves.
@@ -1213,8 +1273,8 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
// Mark the FramePtr as live at the beginning of every block except
// the entry block. (We'll have marked R8 as live on entry when
// saving the GPRs.)
- for (auto I = std::next(MF.begin()), E = MF.end(); I != E; ++I)
- I->addLiveIn(Regs.getFramePointerRegister());
+ for (MachineBasicBlock &B : llvm::drop_begin(MF))
+ B.addLiveIn(Regs.getFramePointerRegister());
}
}
@@ -1321,3 +1381,32 @@ void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized(
// Setup stack frame offset
MFFrame.setOffsetAdjustment(Regs.getStackPointerBias());
}
+
+// Determines the size of the frame, and creates the deferred spill objects.
+void SystemZXPLINKFrameLowering::determineFrameLayout(
+ MachineFunction &MF) const {
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ auto *Regs =
+ static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters());
+
+ uint64_t StackSize = MFFrame.getStackSize();
+ if (StackSize == 0)
+ return;
+
+ // Add the size of the register save area and the reserved area to the size.
+ StackSize += Regs->getCallFrameSize();
+ MFFrame.setStackSize(StackSize);
+
+ // We now know the stack size. Create the fixed spill stack objects for the
+ // register save area now. This has no impact on the stack frame layout, as
+ // this is already computed. However, it makes sure that all callee saved
+ // registers have a valid frame index assigned.
+ const unsigned RegSize = MF.getDataLayout().getPointerSize();
+ for (auto &CS : MFFrame.getCalleeSavedInfo()) {
+ int Offset = RegSpillOffsets[CS.getReg()];
+ if (Offset >= 0)
+ CS.setFrameIdx(
+ MFFrame.CreateFixedSpillStackObject(RegSize, Offset - StackSize));
+ }
+}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index bec83a9457e0..95f30e3c0d99 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -134,6 +134,8 @@ public:
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const override;
+
+ void determineFrameLayout(MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 42c1c77f14e4..ac4531262187 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1404,8 +1404,12 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::BCvt: {
assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
- assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||
- VA.getValVT() == MVT::f128);
+ assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
+ VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
+ // For an f32 vararg we need to first promote it to an f64 and then
+ // bitcast it to an i64.
+ if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
+ Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
? MVT::v2i64
: VA.getLocVT();
diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index 94ebb59c4c77..46bb85606a62 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -418,7 +418,9 @@ unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
if (MI.getOpcode() == VE::LDrii || // I64
MI.getOpcode() == VE::LDLSXrii || // I32
MI.getOpcode() == VE::LDUrii || // F32
- MI.getOpcode() == VE::LDQrii // F128 (pseudo)
+ MI.getOpcode() == VE::LDQrii || // F128 (pseudo)
+ MI.getOpcode() == VE::LDVMrii || // VM (pseudo)
+ MI.getOpcode() == VE::LDVM512rii // VM512 (pseudo)
) {
if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0 && MI.getOperand(3).isImm() &&
@@ -437,10 +439,12 @@ unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
/// any side effects other than storing to the stack slot.
unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
- if (MI.getOpcode() == VE::STrii || // I64
- MI.getOpcode() == VE::STLrii || // I32
- MI.getOpcode() == VE::STUrii || // F32
- MI.getOpcode() == VE::STQrii // F128 (pseudo)
+ if (MI.getOpcode() == VE::STrii || // I64
+ MI.getOpcode() == VE::STLrii || // I32
+ MI.getOpcode() == VE::STUrii || // F32
+ MI.getOpcode() == VE::STQrii || // F128 (pseudo)
+ MI.getOpcode() == VE::STVMrii || // VM (pseudo)
+ MI.getOpcode() == VE::STVM512rii // VM512 (pseudo)
) {
if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
MI.getOperand(1).getImm() == 0 && MI.getOperand(2).isImm() &&
@@ -496,6 +500,20 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addImm(0)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO);
+ } else if (RC == &VE::VMRegClass) {
+ BuildMI(MBB, I, DL, get(VE::STVMrii))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO);
+ } else if (VE::VM512RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(VE::STVM512rii))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO);
} else
report_fatal_error("Can't store this register to stack slot");
}
@@ -539,6 +557,18 @@ void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addImm(0)
.addImm(0)
.addMemOperand(MMO);
+ } else if (RC == &VE::VMRegClass) {
+ BuildMI(MBB, I, DL, get(VE::LDVMrii), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addMemOperand(MMO);
+ } else if (VE::VM512RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(VE::LDVM512rii), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addImm(0)
+ .addMemOperand(MMO);
} else
report_fatal_error("Can't load this register from stack slot");
}
diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td
index 71199717a3a2..0b2f5039e3f3 100644
--- a/llvm/lib/Target/VE/VEInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -25,6 +25,20 @@ def: Pat<(i64 (repl_i32 i32:$val)),
(zero_f32 (i2l $val)),
(SLLri (i2l $val), 32))>;
+///// Mask Load & Store /////
+
+// Store for v256i1, v512i1 are implemented in 2 ways. These STVM/STVM512
+// pseudo instruction is used for frameindex related load/store instructions.
+// Custom Lowering is used for other load/store instructions.
+
+def : Pat<(v256i1 (load ADDRrii:$addr)),
+ (LDVMrii ADDRrii:$addr)>;
+def : Pat<(v512i1 (load ADDRrii:$addr)),
+ (LDVM512rii ADDRrii:$addr)>;
+def : Pat<(store v256i1:$vx, ADDRrii:$addr),
+ (STVMrii ADDRrii:$addr, $vx)>;
+def : Pat<(store v512i1:$vx, ADDRrii:$addr),
+ (STVM512rii ADDRrii:$addr, $vx)>;
multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp,
SDNodeXForm ImmCast, OutPatFrag SuperRegCast> {
diff --git a/llvm/lib/Target/VE/VEInstrVec.td b/llvm/lib/Target/VE/VEInstrVec.td
index 4a8476f7288a..327ad9ceacc5 100644
--- a/llvm/lib/Target/VE/VEInstrVec.td
+++ b/llvm/lib/Target/VE/VEInstrVec.td
@@ -2,6 +2,33 @@
// Vector Instructions
//===----------------------------------------------------------------------===//
+// Pseudo instructions for VM/VM512 spill/restore
+//
+// These pseudo instructions are used for only spill/restore since
+// InlineSpiller assumes storeRegToStackSlot/loadRegFromStackSlot
+// functions emit only single instruction. Those functions emit a
+// single store/load instruction or one of these pseudo store/load
+// instructions.
+//
+// Specifies hasSideEffects = 0 to disable UnmodeledSideEffects.
+
+let mayLoad = 1, hasSideEffects = 0 in {
+def LDVMrii : Pseudo<
+ (outs VM:$vmx), (ins MEMrii:$addr),
+ "# pseudo ldvm $vmx, $addr", []>;
+def LDVM512rii : Pseudo<
+ (outs VM512:$vmx), (ins MEMrii:$addr),
+ "# pseudo ldvm512 $vmx, $addr", []>;
+}
+let mayStore = 1, hasSideEffects = 0 in {
+def STVMrii : Pseudo<
+ (outs), (ins MEMrii:$addr, VM:$vmx),
+ "# pseudo stvm $addr, $vmx", []>;
+def STVM512rii : Pseudo<
+ (outs), (ins MEMrii:$addr, VM512:$vmx),
+ "# pseudo stvm512 $addr, $vmx", []>;
+}
+
//===----------------------------------------------------------------------===//
// Pseudo instructions for VM512 modifications
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp
index f334af128162..397ea09c9a02 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -180,6 +180,16 @@ class EliminateFrameIndex {
int FIOperandNum);
void processLDQ(MachineInstr &MI, Register FrameReg, int64_t Offset,
int FIOperandNum);
+ // Expand and eliminate Frame Index of pseudo STVMrii and LDVMrii.
+ void processSTVM(MachineInstr &MI, Register FrameReg, int64_t Offset,
+ int FIOperandNum);
+ void processLDVM(MachineInstr &MI, Register FrameReg, int64_t Offset,
+ int FIOperandNum);
+ // Expand and eliminate Frame Index of pseudo STVM512rii and LDVM512rii.
+ void processSTVM512(MachineInstr &MI, Register FrameReg, int64_t Offset,
+ int FIOperandNum);
+ void processLDVM512(MachineInstr &MI, Register FrameReg, int64_t Offset,
+ int FIOperandNum);
public:
EliminateFrameIndex(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
@@ -271,6 +281,185 @@ void EliminateFrameIndex::processLDQ(MachineInstr &MI, Register FrameReg,
replaceFI(MI, FrameReg, Offset, FIOperandNum);
}
+void EliminateFrameIndex::processSTVM(MachineInstr &MI, Register FrameReg,
+ int64_t Offset, int FIOperandNum) {
+ assert(MI.getOpcode() == VE::STVMrii);
+ LLVM_DEBUG(dbgs() << "processSTVM: "; MI.dump());
+
+ // Original MI is:
+ // STVMrii frame-index, 0, offset, reg (, memory operand)
+ // Convert it to:
+ // SVMi tmp-reg, reg, 0
+ // STrii frame-reg, 0, offset, tmp-reg
+ // SVMi tmp-reg, reg, 1
+ // STrii frame-reg, 0, offset+8, tmp-reg
+ // SVMi tmp-reg, reg, 2
+ // STrii frame-reg, 0, offset+16, tmp-reg
+ // SVMi tmp-reg, reg, 3
+ // STrii frame-reg, 0, offset+24, tmp-reg
+
+ prepareReplaceFI(MI, FrameReg, Offset, 24);
+
+ Register SrcReg = MI.getOperand(3).getReg();
+ bool isKill = MI.getOperand(3).isKill();
+ // FIXME: it would be better to scavenge a register here instead of
+ // reserving SX16 all of the time.
+ Register TmpReg = VE::SX16;
+ for (int i = 0; i < 3; ++i) {
+ build(VE::SVMmr, TmpReg).addReg(SrcReg).addImm(i);
+ MachineInstr *StMI =
+ build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg(
+ TmpReg, getKillRegState(true));
+ replaceFI(*StMI, FrameReg, Offset, 0);
+ Offset += 8;
+ }
+ build(VE::SVMmr, TmpReg).addReg(SrcReg, getKillRegState(isKill)).addImm(3);
+ MI.setDesc(get(VE::STrii));
+ MI.getOperand(3).ChangeToRegister(TmpReg, false, false, true);
+ replaceFI(MI, FrameReg, Offset, FIOperandNum);
+}
+
+void EliminateFrameIndex::processLDVM(MachineInstr &MI, Register FrameReg,
+ int64_t Offset, int FIOperandNum) {
+ assert(MI.getOpcode() == VE::LDVMrii);
+ LLVM_DEBUG(dbgs() << "processLDVM: "; MI.dump());
+
+ // Original MI is:
+ // LDVMri reg, frame-index, 0, offset (, memory operand)
+ // Convert it to:
+ // LDrii tmp-reg, frame-reg, 0, offset
+ // LVMir vm, 0, tmp-reg
+ // LDrii tmp-reg, frame-reg, 0, offset+8
+ // LVMir_m vm, 1, tmp-reg, vm
+ // LDrii tmp-reg, frame-reg, 0, offset+16
+ // LVMir_m vm, 2, tmp-reg, vm
+ // LDrii tmp-reg, frame-reg, 0, offset+24
+ // LVMir_m vm, 3, tmp-reg, vm
+
+ prepareReplaceFI(MI, FrameReg, Offset, 24);
+
+ Register DestReg = MI.getOperand(0).getReg();
+ // FIXME: it would be better to scavenge a register here instead of
+ // reserving SX16 all of the time.
+ unsigned TmpReg = VE::SX16;
+ for (int i = 0; i < 4; ++i) {
+ if (i != 3) {
+ MachineInstr *StMI =
+ build(VE::LDrii, TmpReg).addReg(FrameReg).addImm(0).addImm(0);
+ replaceFI(*StMI, FrameReg, Offset, 1);
+ Offset += 8;
+ } else {
+ // Last LDrii replace the target instruction.
+ MI.setDesc(get(VE::LDrii));
+ MI.getOperand(0).ChangeToRegister(TmpReg, true);
+ }
+ // First LVM is LVMir. Others are LVMir_m. Last LVM places at the
+ // next of the target instruction.
+ if (i == 0)
+ build(VE::LVMir, DestReg).addImm(i).addReg(TmpReg, getKillRegState(true));
+ else if (i != 3)
+ build(VE::LVMir_m, DestReg)
+ .addImm(i)
+ .addReg(TmpReg, getKillRegState(true))
+ .addReg(DestReg);
+ else
+ BuildMI(*MI.getParent(), std::next(II), DL, get(VE::LVMir_m), DestReg)
+ .addImm(3)
+ .addReg(TmpReg, getKillRegState(true))
+ .addReg(DestReg);
+ }
+ replaceFI(MI, FrameReg, Offset, FIOperandNum);
+}
+
+void EliminateFrameIndex::processSTVM512(MachineInstr &MI, Register FrameReg,
+ int64_t Offset, int FIOperandNum) {
+ assert(MI.getOpcode() == VE::STVM512rii);
+ LLVM_DEBUG(dbgs() << "processSTVM512: "; MI.dump());
+
+ prepareReplaceFI(MI, FrameReg, Offset, 56);
+
+ Register SrcReg = MI.getOperand(3).getReg();
+ Register SrcLoReg = getSubReg(SrcReg, VE::sub_vm_odd);
+ Register SrcHiReg = getSubReg(SrcReg, VE::sub_vm_even);
+ bool isKill = MI.getOperand(3).isKill();
+ // FIXME: it would be better to scavenge a register here instead of
+ // reserving SX16 all of the time.
+ Register TmpReg = VE::SX16;
+ // store low part of VMP
+ MachineInstr *LastMI = nullptr;
+ for (int i = 0; i < 4; ++i) {
+ LastMI = build(VE::SVMmr, TmpReg).addReg(SrcLoReg).addImm(i);
+ MachineInstr *StMI =
+ build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg(
+ TmpReg, getKillRegState(true));
+ replaceFI(*StMI, FrameReg, Offset, 0);
+ Offset += 8;
+ }
+ if (isKill)
+ LastMI->addRegisterKilled(SrcLoReg, &TRI, true);
+ // store high part of VMP
+ for (int i = 0; i < 3; ++i) {
+ build(VE::SVMmr, TmpReg).addReg(SrcHiReg).addImm(i);
+ MachineInstr *StMI =
+ build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg(
+ TmpReg, getKillRegState(true));
+ replaceFI(*StMI, FrameReg, Offset, 0);
+ Offset += 8;
+ }
+ LastMI = build(VE::SVMmr, TmpReg).addReg(SrcHiReg).addImm(3);
+ if (isKill) {
+ LastMI->addRegisterKilled(SrcHiReg, &TRI, true);
+ // Add implicit super-register kills to the particular MI.
+ LastMI->addRegisterKilled(SrcReg, &TRI, true);
+ }
+ MI.setDesc(get(VE::STrii));
+ MI.getOperand(3).ChangeToRegister(TmpReg, false, false, true);
+ replaceFI(MI, FrameReg, Offset, FIOperandNum);
+}
+
+void EliminateFrameIndex::processLDVM512(MachineInstr &MI, Register FrameReg,
+ int64_t Offset, int FIOperandNum) {
+ assert(MI.getOpcode() == VE::LDVM512rii);
+ LLVM_DEBUG(dbgs() << "processLDVM512: "; MI.dump());
+
+ prepareReplaceFI(MI, FrameReg, Offset, 56);
+
+ Register DestReg = MI.getOperand(0).getReg();
+ Register DestLoReg = getSubReg(DestReg, VE::sub_vm_odd);
+ Register DestHiReg = getSubReg(DestReg, VE::sub_vm_even);
+ // FIXME: it would be better to scavenge a register here instead of
+ // reserving SX16 all of the time.
+ Register TmpReg = VE::SX16;
+ build(VE::IMPLICIT_DEF, DestReg);
+ for (int i = 0; i < 4; ++i) {
+ MachineInstr *LdMI =
+ build(VE::LDrii, TmpReg).addReg(FrameReg).addImm(0).addImm(0);
+ replaceFI(*LdMI, FrameReg, Offset, 1);
+ build(VE::LVMir_m, DestLoReg)
+ .addImm(i)
+ .addReg(TmpReg, getKillRegState(true))
+ .addReg(DestLoReg);
+ Offset += 8;
+ }
+ for (int i = 0; i < 3; ++i) {
+ MachineInstr *LdMI =
+ build(VE::LDrii, TmpReg).addReg(FrameReg).addImm(0).addImm(0);
+ replaceFI(*LdMI, FrameReg, Offset, 1);
+ build(VE::LVMir_m, DestHiReg)
+ .addImm(i)
+ .addReg(TmpReg, getKillRegState(true))
+ .addReg(DestHiReg);
+ Offset += 8;
+ }
+ MI.setDesc(get(VE::LDrii));
+ MI.getOperand(0).ChangeToRegister(TmpReg, true);
+ BuildMI(*MI.getParent(), std::next(II), DL, get(VE::LVMir_m), DestHiReg)
+ .addImm(3)
+ .addReg(TmpReg, getKillRegState(true))
+ .addReg(DestHiReg);
+ replaceFI(MI, FrameReg, Offset, FIOperandNum);
+}
+
void EliminateFrameIndex::processMI(MachineInstr &MI, Register FrameReg,
int64_t Offset, int FIOperandNum) {
switch (MI.getOpcode()) {
@@ -280,6 +469,18 @@ void EliminateFrameIndex::processMI(MachineInstr &MI, Register FrameReg,
case VE::LDQrii:
processLDQ(MI, FrameReg, Offset, FIOperandNum);
return;
+ case VE::STVMrii:
+ processSTVM(MI, FrameReg, Offset, FIOperandNum);
+ return;
+ case VE::LDVMrii:
+ processLDVM(MI, FrameReg, Offset, FIOperandNum);
+ return;
+ case VE::STVM512rii:
+ processSTVM512(MI, FrameReg, Offset, FIOperandNum);
+ return;
+ case VE::LDVM512rii:
+ processLDVM512(MI, FrameReg, Offset, FIOperandNum);
+ return;
}
prepareReplaceFI(MI, FrameReg, Offset);
replaceFI(MI, FrameReg, Offset, FIOperandNum);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 9316826e3d92..d7720604d6dc 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -40,7 +40,7 @@ WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
RI(STI.getTargetTriple()) {}
bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable(
- const MachineInstr &MI, AAResults *AA) const {
+ const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case WebAssembly::CONST_I32:
case WebAssembly::CONST_I64:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index f45a3792467a..29d700bdf83f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -43,8 +43,7 @@ public:
const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; }
- bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const override;
+ bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index d3ad47147ac8..f9ef45bfb41c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -49,7 +49,6 @@ class WebAssemblyRegStackify final : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<MachineBlockFrequencyInfo>();
@@ -164,15 +163,15 @@ static void queryCallee(const MachineInstr &MI, bool &Read, bool &Write,
// Determine whether MI reads memory, writes memory, has side effects,
// and/or uses the stack pointer value.
-static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
- bool &Write, bool &Effects, bool &StackPointer) {
+static void query(const MachineInstr &MI, bool &Read, bool &Write,
+ bool &Effects, bool &StackPointer) {
assert(!MI.isTerminator());
if (MI.isDebugInstr() || MI.isPosition())
return;
// Check for loads.
- if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA))
+ if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad())
Read = true;
// Check for stores.
@@ -255,9 +254,9 @@ static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
}
// Test whether Def is safe and profitable to rematerialize.
-static bool shouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
+static bool shouldRematerialize(const MachineInstr &Def,
const WebAssemblyInstrInfo *TII) {
- return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA);
+ return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def);
}
// Identify the definition for this register at this point. This is a
@@ -311,7 +310,7 @@ static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
// more precise.
static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use,
- const MachineInstr *Insert, AliasAnalysis &AA,
+ const MachineInstr *Insert,
const WebAssemblyFunctionInfo &MFI,
const MachineRegisterInfo &MRI) {
const MachineInstr *DefI = Def->getParent();
@@ -391,7 +390,7 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use,
}
bool Read = false, Write = false, Effects = false, StackPointer = false;
- query(*DefI, AA, Read, Write, Effects, StackPointer);
+ query(*DefI, Read, Write, Effects, StackPointer);
// If the instruction does not access memory and has no side effects, it has
// no additional dependencies.
@@ -406,7 +405,7 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use,
bool InterveningWrite = false;
bool InterveningEffects = false;
bool InterveningStackPointer = false;
- query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
+ query(*I, InterveningRead, InterveningWrite, InterveningEffects,
InterveningStackPointer);
if (Effects && InterveningEffects)
return false;
@@ -808,7 +807,6 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
- AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &MDT = getAnalysis<MachineDominatorTree>();
auto &LIS = getAnalysis<LiveIntervals>();
@@ -872,8 +870,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// supports intra-block moves) and it's MachineSink's job to catch all
// the sinking opportunities anyway.
bool SameBlock = DefI->getParent() == &MBB;
- bool CanMove = SameBlock &&
- isSafeToMove(Def, &Use, Insert, AA, MFI, MRI) &&
+ bool CanMove = SameBlock && isSafeToMove(Def, &Use, Insert, MFI, MRI) &&
!TreeWalker.isOnStack(Reg);
if (CanMove && hasOneUse(Reg, DefI, MRI, MDT, LIS)) {
Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI);
@@ -883,7 +880,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// TODO: Encode this properly as a stackified value.
if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg)
MFI.clearFrameBaseVreg();
- } else if (shouldRematerialize(*DefI, AA, TII)) {
+ } else if (shouldRematerialize(*DefI, TII)) {
Insert =
rematerializeCheapDef(Reg, Use, *DefI, MBB, Insert->getIterator(),
LIS, MFI, MRI, TII, TRI);
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a859176220c7..fa0a6bd415dc 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1277,7 +1277,7 @@ class ProcModel<string Name, SchedMachineModel Model,
// enabled. It has no effect on code generation.
// NOTE: As a default tuning, "generic" aims to produce code optimized for the
// most common X86 processors. The tunings might be changed over time. It is
-// recommended to use "x86-64" in lit tests for consistency.
+// recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
def : ProcModel<"generic", SandyBridgeModel,
[FeatureX87, FeatureCX8, FeatureX86_64],
[TuningSlow3OpsLEA,
diff --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index 16bff201dd03..db6923416177 100644
--- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -393,12 +393,12 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI,
switch (MI->getOpcode()) {
case X86::MOV8rm:
- // Only replace 8 bit loads with the zero extending versions if
- // in an inner most loop and not optimizing for size. This takes
- // an extra byte to encode, and provides limited performance upside.
- if (MachineLoop *ML = MLI->getLoopFor(&MBB))
- if (ML->begin() == ML->end() && !OptForSize)
- return tryReplaceLoad(X86::MOVZX32rm8, MI);
+ // Replace 8-bit loads with the zero-extending version if not optimizing
+ // for size. The extending op is cheaper across a wide range of uarch and
+ // it avoids a potentially expensive partial register stall. It takes an
+ // extra byte to encode, however, so don't do this when optimizing for size.
+ if (!OptForSize)
+ return tryReplaceLoad(X86::MOVZX32rm8, MI);
break;
case X86::MOV16rm:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 12af6087cb47..5a4533c4bac4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -555,6 +555,39 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
+ auto setF16Action = [&] (MVT VT, LegalizeAction Action) {
+ setOperationAction(ISD::FABS, VT, Action);
+ setOperationAction(ISD::FNEG, VT, Action);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Action);
+ setOperationAction(ISD::FMA, VT, Action);
+ setOperationAction(ISD::FMINNUM, VT, Action);
+ setOperationAction(ISD::FMAXNUM, VT, Action);
+ setOperationAction(ISD::FMINIMUM, VT, Action);
+ setOperationAction(ISD::FMAXIMUM, VT, Action);
+ setOperationAction(ISD::FSIN, VT, Action);
+ setOperationAction(ISD::FCOS, VT, Action);
+ setOperationAction(ISD::FSINCOS, VT, Action);
+ setOperationAction(ISD::FSQRT, VT, Action);
+ setOperationAction(ISD::FPOW, VT, Action);
+ setOperationAction(ISD::FLOG, VT, Action);
+ setOperationAction(ISD::FLOG2, VT, Action);
+ setOperationAction(ISD::FLOG10, VT, Action);
+ setOperationAction(ISD::FEXP, VT, Action);
+ setOperationAction(ISD::FEXP2, VT, Action);
+ setOperationAction(ISD::FCEIL, VT, Action);
+ setOperationAction(ISD::FFLOOR, VT, Action);
+ setOperationAction(ISD::FNEARBYINT, VT, Action);
+ setOperationAction(ISD::FRINT, VT, Action);
+ setOperationAction(ISD::BR_CC, VT, Action);
+ setOperationAction(ISD::SETCC, VT, Action);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Action);
+ setOperationAction(ISD::FROUND, VT, Action);
+ setOperationAction(ISD::FROUNDEVEN, VT, Action);
+ setOperationAction(ISD::FTRUNC, VT, Action);
+ };
+
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
// f16, f32 and f64 use SSE.
// Set up the FP register classes.
@@ -592,40 +625,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Half type will be promoted by default.
- setOperationAction(ISD::FABS, MVT::f16, Promote);
- setOperationAction(ISD::FNEG, MVT::f16, Promote);
- setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
+ setF16Action(MVT::f16, Promote);
setOperationAction(ISD::FADD, MVT::f16, Promote);
setOperationAction(ISD::FSUB, MVT::f16, Promote);
setOperationAction(ISD::FMUL, MVT::f16, Promote);
setOperationAction(ISD::FDIV, MVT::f16, Promote);
- setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FMA, MVT::f16, Promote);
- setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
- setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
- setOperationAction(ISD::FSIN, MVT::f16, Promote);
- setOperationAction(ISD::FCOS, MVT::f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
- setOperationAction(ISD::FSQRT, MVT::f16, Promote);
- setOperationAction(ISD::FPOW, MVT::f16, Promote);
- setOperationAction(ISD::FLOG, MVT::f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::f16, Promote);
- setOperationAction(ISD::FEXP, MVT::f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::f16, Promote);
- setOperationAction(ISD::FCEIL, MVT::f16, Promote);
- setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
- setOperationAction(ISD::FRINT, MVT::f16, Promote);
- setOperationAction(ISD::BR_CC, MVT::f16, Promote);
- setOperationAction(ISD::SETCC, MVT::f16, Promote);
- setOperationAction(ISD::SELECT, MVT::f16, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
- setOperationAction(ISD::FROUND, MVT::f16, Promote);
- setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
- setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall);
setOperationAction(ISD::FP_EXTEND, MVT::f32, LibCall);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
@@ -1003,6 +1007,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
: &X86::VR128RegClass);
addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
+ addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass
+ : &X86::VR128RegClass);
addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
@@ -1084,7 +1090,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
- for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
+ for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
@@ -1095,19 +1101,25 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
+ setF16Action(MVT::v8f16, Expand);
+ setOperationAction(ISD::FADD, MVT::v8f16, Expand);
+ setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
+ setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
+ setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v8f16, Custom);
setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
// Custom legalize these to avoid over promotion or custom promotion.
@@ -1118,8 +1130,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
}
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
@@ -1304,6 +1316,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
: &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
+ addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass
+ : &X86::VR256RegClass);
addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
: &X86::VR256RegClass);
addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
@@ -1340,12 +1354,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
- setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
@@ -1356,7 +1372,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
@@ -1386,6 +1401,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
+ setOperationAction(ISD::SELECT, MVT::v16f16, Custom);
setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
@@ -1507,7 +1523,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Custom lower several nodes for 256-bit types.
for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
- MVT::v8f32, MVT::v4f64 }) {
+ MVT::v16f16, MVT::v8f32, MVT::v4f64 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
@@ -1518,6 +1534,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
}
+ setF16Action(MVT::v16f16, Expand);
+ setOperationAction(ISD::FADD, MVT::v16f16, Expand);
+ setOperationAction(ISD::FSUB, MVT::v16f16, Expand);
+ setOperationAction(ISD::FMUL, MVT::v16f16, Expand);
+ setOperationAction(ISD::FDIV, MVT::v16f16, Expand);
if (HasInt256) {
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
@@ -1532,11 +1553,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
- if (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) {
- setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
+ if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() &&
+ Subtarget.hasF16C()) {
+ for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {
+ setOperationAction(ISD::FP_ROUND, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
+ }
+ for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32 }) {
+ setOperationAction(ISD::FP_EXTEND, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom);
+ }
+ for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
+ setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
+ setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
+ }
+
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
}
// This block controls legalization of the mask vector sizes that are
@@ -1619,6 +1652,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
+ addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
@@ -1645,14 +1679,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
}
- setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom);
setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
@@ -1664,7 +1700,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
@@ -1799,15 +1834,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
if (Subtarget.hasDQI()) {
- setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
-
+ for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
+ ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
+ ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
+ setOperationAction(Opc, MVT::v8i64, Custom);
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
}
@@ -1831,7 +1861,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
- MVT::v16f32, MVT::v8f64 }) {
+ MVT::v32f16, MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::SELECT, VT, Custom);
@@ -1842,6 +1872,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
}
+ setF16Action(MVT::v32f16, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
+ for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) {
+ setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
+ setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32);
+ }
for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::MLOAD, VT, Legal);
@@ -1881,23 +1920,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// These operations are handled on non-VLX by artificially widening in
// isel patterns.
- setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
- Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
- Subtarget.hasVLX() ? Legal : Custom);
if (Subtarget.hasDQI()) {
// Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
@@ -1934,25 +1959,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MSCATTER, VT, Custom);
if (Subtarget.hasDQI()) {
- for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
- setOperationAction(ISD::SINT_TO_FP, VT,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::UINT_TO_FP, VT,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::FP_TO_SINT, VT,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::FP_TO_UINT, VT,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
- Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::MUL, VT, Legal);
+ for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
+ ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
+ ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) {
+ setOperationAction(Opc, MVT::v2i64, Custom);
+ setOperationAction(Opc, MVT::v4i64, Custom);
}
+ setOperationAction(ISD::MUL, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i64, Legal);
}
if (Subtarget.hasCDI()) {
@@ -2052,7 +2066,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// AVX512_FP16 scalar operations
setGroup(MVT::f16);
- addRegisterClass(MVT::f16, &X86::FR16XRegClass);
setOperationAction(ISD::FREM, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
@@ -2066,6 +2079,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
@@ -2073,14 +2087,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.useAVX512Regs()) {
setGroup(MVT::v32f16);
- addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
@@ -2112,8 +2129,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasVLX()) {
- addRegisterClass(MVT::v8f16, &X86::VR128XRegClass);
- addRegisterClass(MVT::v16f16, &X86::VR256XRegClass);
setGroup(MVT::v8f16);
setGroup(MVT::v16f16);
@@ -2132,8 +2147,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
// INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
@@ -2347,7 +2366,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::FP16_TO_FP,
ISD::FP_EXTEND,
ISD::STRICT_FP_EXTEND,
- ISD::FP_ROUND});
+ ISD::FP_ROUND,
+ ISD::STRICT_FP_ROUND});
computeRegisterProperties(Subtarget.getRegisterInfo());
@@ -2404,6 +2424,10 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
return TypeSplitVector;
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
+ !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16)
+ return TypeSplitVector;
+
+ if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
VT.getVectorElementType() != MVT::i1)
return TypeWidenVector;
@@ -2447,22 +2471,21 @@ handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
- Subtarget.hasAVX512()) {
- unsigned NumElts = VT.getVectorNumElements();
+ if (VT.isVector()) {
+ if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
+ unsigned NumElts = VT.getVectorNumElements();
- MVT RegisterVT;
- unsigned NumRegisters;
- std::tie(RegisterVT, NumRegisters) =
- handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
- if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
- return RegisterVT;
- }
+ MVT RegisterVT;
+ unsigned NumRegisters;
+ std::tie(RegisterVT, NumRegisters) =
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return RegisterVT;
+ }
- // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
- // So its default register type is f16. We override the type to v8f16 here.
- if (VT == MVT::v3f16 && Subtarget.hasFP16())
- return MVT::v8f16;
+ if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
+ return MVT::v8f16;
+ }
// We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
@@ -2475,22 +2498,21 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
- Subtarget.hasAVX512()) {
- unsigned NumElts = VT.getVectorNumElements();
+ if (VT.isVector()) {
+ if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
+ unsigned NumElts = VT.getVectorNumElements();
- MVT RegisterVT;
- unsigned NumRegisters;
- std::tie(RegisterVT, NumRegisters) =
- handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
- if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
- return NumRegisters;
- }
+ MVT RegisterVT;
+ unsigned NumRegisters;
+ std::tie(RegisterVT, NumRegisters) =
+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return NumRegisters;
+ }
- // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
- // So its default register number is 3. We override the number to 1 here.
- if (VT == MVT::v3f16 && Subtarget.hasFP16())
- return 1;
+ if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
+ return 1;
+ }
// We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
// x87 is disabled.
@@ -9646,13 +9668,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
EVT CVT = Ld.getValueType();
assert(!CVT.isVector() && "Must not broadcast a vector type");
- // Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
+ // Splat f16, f32, i32, v4f64, v4i64 in all cases with AVX2.
// For size optimization, also splat v2f64 and v2i64, and for size opt
// with AVX2, also splat i8 and i16.
// With pattern matching, the VBROADCAST node may become a VMOVDDUP.
if (ScalarSize == 32 ||
(ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) ||
- (ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) ||
+ CVT == MVT::f16 ||
(OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
const Constant *C = nullptr;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
@@ -14129,6 +14151,16 @@ static bool isShuffleFoldableLoad(SDValue V) {
ISD::isNON_EXTLoad(peekThroughOneUseBitcasts(V).getNode());
}
+template<typename T>
+static bool isSoftFP16(T VT, const X86Subtarget &Subtarget) {
+ return VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16();
+}
+
+template<typename T>
+bool X86TargetLowering::isSoftFP16(T VT) const {
+ return ::isSoftFP16(VT, Subtarget);
+}
+
/// Try to lower insertion of a single element into a zero vector.
///
/// This is a common pattern that we have especially efficient patterns to lower
@@ -14140,6 +14172,9 @@ static SDValue lowerShuffleAsElementInsertion(
MVT ExtVT = VT;
MVT EltVT = VT.getVectorElementType();
+ if (isSoftFP16(EltVT, Subtarget))
+ return SDValue();
+
int V2Index =
find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) -
Mask.begin();
@@ -19444,6 +19479,15 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
+ SDLoc dl(Op);
+ MVT VT = Op.getSimpleValueType();
+ if (isSoftFP16(VT)) {
+ MVT NVT = VT.changeVectorElementTypeToInteger();
+ return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, dl, NVT, Cond,
+ DAG.getBitcast(NVT, LHS),
+ DAG.getBitcast(NVT, RHS)));
+ }
+
// A vselect where all conditions and data are constants can be optimized into
// a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR().
if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) &&
@@ -19467,8 +19511,6 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget.hasSSE41())
return SDValue();
- SDLoc dl(Op);
- MVT VT = Op.getSimpleValueType();
unsigned EltSize = VT.getScalarSizeInBits();
unsigned NumElts = VT.getVectorNumElements();
@@ -20856,16 +20898,6 @@ static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG,
return Cvt;
}
-template<typename T>
-static bool isSoftFP16(T VT, const X86Subtarget &Subtarget) {
- return VT == MVT::f16 && !Subtarget.hasFP16();
-}
-
-template<typename T>
-bool X86TargetLowering::isSoftFP16(T VT) const {
- return ::isSoftFP16(VT, Subtarget);
-}
-
static SDValue promoteXINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
bool IsStrict = Op->isStrictFPOpcode();
SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
@@ -20885,6 +20917,26 @@ static SDValue promoteXINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(Op.getOpcode(), dl, NVT, Src), Rnd);
}
+static bool isLegalConversion(MVT VT, bool IsSigned,
+ const X86Subtarget &Subtarget) {
+ if (VT == MVT::v4i32 && Subtarget.hasSSE2() && IsSigned)
+ return true;
+ if (VT == MVT::v8i32 && Subtarget.hasAVX() && IsSigned)
+ return true;
+ if (Subtarget.hasVLX() && (VT == MVT::v4i32 || VT == MVT::v8i32))
+ return true;
+ if (Subtarget.useAVX512Regs()) {
+ if (VT == MVT::v16i32)
+ return true;
+ if (VT == MVT::v8i64 && Subtarget.hasDQI())
+ return true;
+ }
+ if (Subtarget.hasDQI() && Subtarget.hasVLX() &&
+ (VT == MVT::v2i64 || VT == MVT::v4i64))
+ return true;
+ return false;
+}
+
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
@@ -20897,6 +20949,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
if (isSoftFP16(VT))
return promoteXINT_TO_FP(Op, DAG);
+ else if (isLegalConversion(SrcVT, true, Subtarget))
+ return Op;
if (Subtarget.isTargetWin64() && SrcVT == MVT::i128)
return LowerWin64_INT128_TO_FP(Op, DAG);
@@ -21400,6 +21454,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (isSoftFP16(DstVT))
return promoteXINT_TO_FP(Op, DAG);
+ else if (isLegalConversion(SrcVT, false, Subtarget))
+ return Op;
if (DstVT.isVector())
return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
@@ -22229,6 +22285,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
{NVT, MVT::Other}, {Chain, Src})});
return DAG.getNode(Op.getOpcode(), dl, VT,
DAG.getNode(ISD::FP_EXTEND, dl, NVT, Src));
+ } else if (isTypeLegal(SrcVT) && isLegalConversion(VT, IsSigned, Subtarget)) {
+ return Op;
}
if (VT.isVector()) {
@@ -22826,7 +22884,7 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
return Op;
if (SVT.getVectorElementType() == MVT::f16) {
- assert(Subtarget.hasFP16() && Subtarget.hasVLX() && "Unexpected features!");
+ assert(Subtarget.hasF16C() && "Unexpected features!");
if (SVT == MVT::v2f16)
In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In,
DAG.getUNDEF(MVT::v2f16));
@@ -22836,6 +22894,8 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},
{Op->getOperand(0), Res});
return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);
+ } else if (VT == MVT::v4f64 || VT == MVT::v8f64) {
+ return Op;
}
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
@@ -22854,34 +22914,19 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
SDValue In = Op.getOperand(IsStrict ? 1 : 0);
- SDValue Op2 = Op.getOperand(IsStrict ? 2 : 1);
MVT VT = Op.getSimpleValueType();
MVT SVT = In.getSimpleValueType();
if (SVT == MVT::f128 || (VT == MVT::f16 && SVT == MVT::f80))
return SDValue();
- if (VT == MVT::f16) {
- if (Subtarget.hasFP16())
- return Op;
-
- if (SVT != MVT::f32) {
- if (IsStrict)
- return DAG.getNode(
- ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other},
- {Chain,
- DAG.getNode(ISD::STRICT_FP_ROUND, DL, {MVT::f32, MVT::Other},
- {Chain, In, Op2}),
- Op2});
-
- return DAG.getNode(ISD::FP_ROUND, DL, VT,
- DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, In, Op2),
- Op2);
- }
-
- if (!Subtarget.hasF16C())
+ if (VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) {
+ if (!Subtarget.hasF16C() || SVT.getScalarType() != MVT::f32)
return SDValue();
+ if (VT.isVector())
+ return Op;
+
SDValue Res;
SDValue Rnd = DAG.getTargetConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, DL,
MVT::i32);
@@ -24176,10 +24221,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
if (isFP) {
-#ifndef NDEBUG
MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
assert(EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64);
-#endif
+ if (isSoftFP16(EltVT, Subtarget))
+ return SDValue();
bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
@@ -24741,6 +24786,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC =
cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
+ if (isSoftFP16(Op0.getValueType()))
+ return SDValue();
+
// Handle f128 first, since one possible outcome is a normal integer
// comparison which gets handled by emitFlagsForSetcc.
if (Op0.getValueType() == MVT::f128) {
@@ -24931,10 +24979,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op1.getSimpleValueType();
SDValue CC;
- if (isSoftFP16(VT))
- return DAG.getBitcast(MVT::f16, DAG.getNode(ISD::SELECT, DL, MVT::i16, Cond,
- DAG.getBitcast(MVT::i16, Op1),
- DAG.getBitcast(MVT::i16, Op2)));
+ if (isSoftFP16(VT)) {
+ MVT NVT = VT.changeTypeToInteger();
+ return DAG.getBitcast(VT, DAG.getNode(ISD::SELECT, DL, NVT, Cond,
+ DAG.getBitcast(NVT, Op1),
+ DAG.getBitcast(NVT, Op2)));
+ }
// Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
// are available or VBLENDV if AVX is available.
@@ -27268,27 +27318,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
- case Intrinsic::swift_async_context_addr: {
- auto &MF = DAG.getMachineFunction();
- auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
- if (Subtarget.is64Bit()) {
- MF.getFrameInfo().setFrameAddressIsTaken(true);
- X86FI->setHasSwiftAsyncContext(true);
- return SDValue(
- DAG.getMachineNode(
- X86::SUB64ri8, dl, MVT::i64,
- DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64),
- DAG.getTargetConstant(8, dl, MVT::i32)),
- 0);
- } else {
- // 32-bit so no special extended frame, create or reuse an existing stack
- // slot.
- if (!X86FI->getSwiftAsyncContextFrameIdx())
- X86FI->setSwiftAsyncContextFrameIdx(
- MF.getFrameInfo().CreateStackObject(4, Align(4), false));
- return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32);
- }
- }
case Intrinsic::x86_avx512_vp2intersect_q_512:
case Intrinsic::x86_avx512_vp2intersect_q_256:
case Intrinsic::x86_avx512_vp2intersect_q_128:
@@ -27668,6 +27697,37 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo);
if (!IntrData) {
switch (IntNo) {
+
+ case Intrinsic::swift_async_context_addr: {
+ SDLoc dl(Op);
+ auto &MF = DAG.getMachineFunction();
+ auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ if (Subtarget.is64Bit()) {
+ MF.getFrameInfo().setFrameAddressIsTaken(true);
+ X86FI->setHasSwiftAsyncContext(true);
+ SDValue Chain = Op->getOperand(0);
+ SDValue CopyRBP = DAG.getCopyFromReg(Chain, dl, X86::RBP, MVT::i64);
+ SDValue Result =
+ SDValue(DAG.getMachineNode(X86::SUB64ri8, dl, MVT::i64, CopyRBP,
+ DAG.getTargetConstant(8, dl, MVT::i32)),
+ 0);
+ // Return { result, chain }.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
+ CopyRBP.getValue(1));
+ } else {
+ // 32-bit so no special extended frame, create or reuse an existing
+ // stack slot.
+ if (!X86FI->getSwiftAsyncContextFrameIdx())
+ X86FI->setSwiftAsyncContextFrameIdx(
+ MF.getFrameInfo().CreateStackObject(4, Align(4), false));
+ SDValue Result =
+ DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32);
+ // Return { result, chain }.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
+ Op->getOperand(0));
+ }
+ }
+
case llvm::Intrinsic::x86_seh_ehregnode:
return MarkEHRegistrationNode(Op, DAG);
case llvm::Intrinsic::x86_seh_ehguard:
@@ -32901,20 +32961,39 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: {
bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Rnd = N->getOperand(IsStrict ? 2 : 1);
+ EVT SrcVT = Src.getValueType();
EVT VT = N->getValueType(0);
- EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32;
+ SDValue V;
if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) {
SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f32)
: DAG.getUNDEF(MVT::v2f32);
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, Ext);
}
+ if (!Subtarget.hasFP16() && VT.getVectorElementType() == MVT::f16) {
+ assert(Subtarget.hasF16C() && "Cannot widen f16 without F16C");
+ if (SrcVT.getVectorElementType() != MVT::f32)
+ return;
+
+ if (IsStrict)
+ V = DAG.getNode(X86ISD::STRICT_CVTPS2PH, dl, {MVT::v8i16, MVT::Other},
+ {Chain, Src, Rnd});
+ else
+ V = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Src, Rnd);
+
+ Results.push_back(DAG.getBitcast(MVT::v8f16, V));
+ if (IsStrict)
+ Results.push_back(V.getValue(1));
+ return;
+ }
if (!isTypeLegal(Src.getValueType()))
return;
- SDValue V;
+ EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32;
if (IsStrict)
V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {NewVT, MVT::Other},
- {N->getOperand(0), Src});
+ {Chain, Src});
else
V = DAG.getNode(X86ISD::VFPROUND, dl, NewVT, Src);
Results.push_back(V);
@@ -37342,6 +37421,7 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
bool IsUnary) {
unsigned NumMaskElts = Mask.size();
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
+ unsigned SizeInBits = MaskVT.getSizeInBits();
if (MaskVT.is128BitVector()) {
if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG) &&
@@ -37409,7 +37489,10 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Attempt to match against a OR if we're performing a blend shuffle and the
// non-blended source element is zero in each case.
- if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 &&
+ // TODO: Handle cases where V1/V2 sizes doesn't match SizeInBits.
+ if (SizeInBits == V1.getValueSizeInBits() &&
+ SizeInBits == V2.getValueSizeInBits() &&
+ (EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 &&
(EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) {
bool IsBlend = true;
unsigned NumV1Elts = V1.getValueType().getVectorNumElements();
@@ -39652,11 +39735,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
SmallVector<int, 4> Mask;
unsigned Opcode = N.getOpcode();
- // FIXME: Remove this after we support vector FP16
- if (isSoftFP16(peekThroughBitcasts(N.getOperand(0)).getSimpleValueType(),
- Subtarget))
- return SDValue();
-
if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))
return R;
@@ -40947,12 +41025,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
EltBits)) {
OpBits.clearAllBits();
OpElts.clearAllBits();
- for (int I = 0; I != NumElts; ++I)
- if (DemandedElts[I] && ((Invert && !EltBits[I].isAllOnes()) ||
- (!Invert && !EltBits[I].isZero()))) {
+ for (int I = 0; I != NumElts; ++I) {
+ if (!DemandedElts[I])
+ continue;
+ if (UndefElts[I]) {
+ // We can't assume an undef src element gives an undef dst - the
+ // other src might be zero.
+ OpBits.setAllBits();
+ OpElts.setBit(I);
+ } else if ((Invert && !EltBits[I].isAllOnes()) ||
+ (!Invert && !EltBits[I].isZero())) {
OpBits |= Invert ? ~EltBits[I] : EltBits[I];
OpElts.setBit(I);
}
+ }
}
return std::make_pair(OpBits, OpElts);
};
@@ -44715,7 +44801,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
// Early exit check
- if (!TLI.isTypeLegal(VT))
+ if (!TLI.isTypeLegal(VT) || isSoftFP16(VT, Subtarget))
return SDValue();
if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget))
@@ -47798,11 +47884,17 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
EltBits)) {
DemandedBits.clearAllBits();
DemandedElts.clearAllBits();
- for (int I = 0; I != NumElts; ++I)
- if (!EltBits[I].isZero()) {
+ for (int I = 0; I != NumElts; ++I) {
+ if (UndefElts[I]) {
+ // We can't assume an undef src element gives an undef dst - the
+ // other src might be zero.
+ DemandedBits.setAllBits();
+ DemandedElts.setBit(I);
+ } else if (!EltBits[I].isZero()) {
DemandedBits |= EltBits[I];
DemandedElts.setBit(I);
}
+ }
}
return std::make_pair(DemandedBits, DemandedElts);
};
@@ -51042,6 +51134,8 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
MVT VT = N->getSimpleValueType(0);
+ int NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
// ANDNP(undef, x) -> 0
// ANDNP(x, undef) -> 0
@@ -51060,6 +51154,19 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
if (SDValue Not = IsNOT(N0, DAG))
return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1);
+ // Constant Folding
+ APInt Undefs0, Undefs1;
+ SmallVector<APInt> EltBits0, EltBits1;
+ if (getTargetConstantBitsFromNode(N0, EltSizeInBits, Undefs0, EltBits0) &&
+ getTargetConstantBitsFromNode(N1, EltSizeInBits, Undefs1, EltBits1)) {
+ SDLoc DL(N);
+ SmallVector<APInt> ResultBits;
+ for (int I = 0; I != NumElts; ++I)
+ ResultBits.push_back(~EltBits0[I] & EltBits1[I]);
+ APInt ResultUndefs = APInt::getZero(NumElts);
+ return getConstVector(ResultBits, ResultUndefs, VT, DAG, DL);
+ }
+
// TODO: Constant fold NOT(N0) to allow us to use AND.
// TODO: Do this in IsNOT with suitable oneuse checks?
@@ -51074,20 +51181,24 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) {
APInt UndefElts;
SmallVector<APInt> EltBits;
- int NumElts = VT.getVectorNumElements();
- int EltSizeInBits = VT.getScalarSizeInBits();
APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
APInt DemandedElts = APInt::getAllOnes(NumElts);
if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts,
EltBits)) {
DemandedBits.clearAllBits();
DemandedElts.clearAllBits();
- for (int I = 0; I != NumElts; ++I)
- if ((Invert && !EltBits[I].isAllOnes()) ||
- (!Invert && !EltBits[I].isZero())) {
+ for (int I = 0; I != NumElts; ++I) {
+ if (UndefElts[I]) {
+ // We can't assume an undef src element gives an undef dst - the
+ // other src might be zero.
+ DemandedBits.setAllBits();
+ DemandedElts.setBit(I);
+ } else if ((Invert && !EltBits[I].isAllOnes()) ||
+ (!Invert && !EltBits[I].isZero())) {
DemandedBits |= Invert ? ~EltBits[I] : EltBits[I];
DemandedElts.setBit(I);
}
+ }
}
return std::make_pair(DemandedBits, DemandedElts);
};
@@ -54714,8 +54825,9 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasFP16())
return SDValue();
+ bool IsStrict = N->isStrictFPOpcode();
EVT VT = N->getValueType(0);
- SDValue Src = N->getOperand(0);
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
EVT SrcVT = Src.getValueType();
if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
@@ -54736,8 +54848,15 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
// Destination is v8i16 with at least 8 elements.
EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
std::max(8U, NumElts));
- SDValue Cvt = DAG.getNode(X86ISD::CVTPS2PH, dl, CvtVT, Src,
- DAG.getTargetConstant(4, dl, MVT::i32));
+ SDValue Cvt, Chain;
+ SDValue Rnd = DAG.getTargetConstant(4, dl, MVT::i32);
+ if (IsStrict) {
+ Cvt = DAG.getNode(X86ISD::STRICT_CVTPS2PH, dl, {CvtVT, MVT::Other},
+ {N->getOperand(0), Src, Rnd});
+ Chain = Cvt.getValue(1);
+ } else {
+ Cvt = DAG.getNode(X86ISD::CVTPS2PH, dl, CvtVT, Src, Rnd);
+ }
// Extract down to real number of elements.
if (NumElts < 8) {
@@ -54746,7 +54865,12 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, dl));
}
- return DAG.getBitcast(VT, Cvt);
+ Cvt = DAG.getBitcast(VT, Cvt);
+
+ if (IsStrict)
+ return DAG.getMergeValues({Cvt, Chain}, dl);
+
+ return Cvt;
}
static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) {
@@ -54954,6 +55078,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget);
case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DAG, Subtarget);
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: return combineFP_ROUND(N, DAG, Subtarget);
case X86ISD::VBROADCAST_LOAD:
case X86ISD::SUBV_BROADCAST_LOAD: return combineBROADCAST_LOAD(N, DAG, DCI);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 48da7b3ac882..c105bde78ad1 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3769,12 +3769,16 @@ let Predicates = [HasAVX512] in {
(VMOVDQA64Zrm addr:$src)>;
def : Pat<(alignedloadv32i16 addr:$src),
(VMOVDQA64Zrm addr:$src)>;
+ def : Pat<(alignedloadv32f16 addr:$src),
+ (VMOVAPSZrm addr:$src)>;
def : Pat<(alignedloadv64i8 addr:$src),
(VMOVDQA64Zrm addr:$src)>;
def : Pat<(loadv16i32 addr:$src),
(VMOVDQU64Zrm addr:$src)>;
def : Pat<(loadv32i16 addr:$src),
(VMOVDQU64Zrm addr:$src)>;
+ def : Pat<(loadv32f16 addr:$src),
+ (VMOVUPSZrm addr:$src)>;
def : Pat<(loadv64i8 addr:$src),
(VMOVDQU64Zrm addr:$src)>;
@@ -3783,12 +3787,16 @@ let Predicates = [HasAVX512] in {
(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
+ def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
+ (VMOVAPSZmr addr:$dst, VR512:$src)>;
def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
(VMOVDQA64Zmr addr:$dst, VR512:$src)>;
def : Pat<(store (v16i32 VR512:$src), addr:$dst),
(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
def : Pat<(store (v32i16 VR512:$src), addr:$dst),
(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
+ def : Pat<(store (v32f16 VR512:$src), addr:$dst),
+ (VMOVUPSZmr addr:$dst, VR512:$src)>;
def : Pat<(store (v64i8 VR512:$src), addr:$dst),
(VMOVDQU64Zmr addr:$dst, VR512:$src)>;
}
@@ -3799,12 +3807,16 @@ let Predicates = [HasVLX] in {
(VMOVDQA64Z128rm addr:$src)>;
def : Pat<(alignedloadv8i16 addr:$src),
(VMOVDQA64Z128rm addr:$src)>;
+ def : Pat<(alignedloadv8f16 addr:$src),
+ (VMOVAPSZ128rm addr:$src)>;
def : Pat<(alignedloadv16i8 addr:$src),
(VMOVDQA64Z128rm addr:$src)>;
def : Pat<(loadv4i32 addr:$src),
(VMOVDQU64Z128rm addr:$src)>;
def : Pat<(loadv8i16 addr:$src),
(VMOVDQU64Z128rm addr:$src)>;
+ def : Pat<(loadv8f16 addr:$src),
+ (VMOVUPSZ128rm addr:$src)>;
def : Pat<(loadv16i8 addr:$src),
(VMOVDQU64Z128rm addr:$src)>;
@@ -3813,12 +3825,16 @@ let Predicates = [HasVLX] in {
(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
+ def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
+ (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
(VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
+ def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
+ (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
(VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
@@ -3827,12 +3843,16 @@ let Predicates = [HasVLX] in {
(VMOVDQA64Z256rm addr:$src)>;
def : Pat<(alignedloadv16i16 addr:$src),
(VMOVDQA64Z256rm addr:$src)>;
+ def : Pat<(alignedloadv16f16 addr:$src),
+ (VMOVAPSZ256rm addr:$src)>;
def : Pat<(alignedloadv32i8 addr:$src),
(VMOVDQA64Z256rm addr:$src)>;
def : Pat<(loadv8i32 addr:$src),
(VMOVDQU64Z256rm addr:$src)>;
def : Pat<(loadv16i16 addr:$src),
(VMOVDQU64Z256rm addr:$src)>;
+ def : Pat<(loadv16f16 addr:$src),
+ (VMOVUPSZ256rm addr:$src)>;
def : Pat<(loadv32i8 addr:$src),
(VMOVDQU64Z256rm addr:$src)>;
@@ -3841,12 +3861,16 @@ let Predicates = [HasVLX] in {
(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
+ def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
+ (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
(VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
+ def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
+ (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
}
@@ -3855,16 +3879,12 @@ let Predicates = [HasBWI] in {
(VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)),
(VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
- def : Pat<(v32f16 (alignedloadv32f16 addr:$src)),
- (VMOVAPSZrm addr:$src)>;
def : Pat<(v32f16 (vselect VK32WM:$mask,
(v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))),
(VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
def : Pat<(v32f16 (vselect VK32WM:$mask,
(v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
(VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
- def : Pat<(v32f16 (loadv32f16 addr:$src)),
- (VMOVUPSZrm addr:$src)>;
def : Pat<(v32f16 (vselect VK32WM:$mask,
(v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))),
(VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
@@ -3878,10 +3898,6 @@ let Predicates = [HasBWI] in {
def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)),
(VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
- def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
- (VMOVAPSZmr addr:$dst, VR512:$src)>;
- def : Pat<(store (v32f16 VR512:$src), addr:$dst),
- (VMOVUPSZmr addr:$dst, VR512:$src)>;
def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask),
(VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
}
@@ -3890,16 +3906,12 @@ let Predicates = [HasBWI, HasVLX] in {
(VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)),
(VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
- def : Pat<(v16f16 (alignedloadv16f16 addr:$src)),
- (VMOVAPSZ256rm addr:$src)>;
def : Pat<(v16f16 (vselect VK16WM:$mask,
(v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
(VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(v16f16 (vselect VK16WM:$mask,
(v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
(VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
- def : Pat<(v16f16 (loadv16f16 addr:$src)),
- (VMOVUPSZ256rm addr:$src)>;
def : Pat<(v16f16 (vselect VK16WM:$mask,
(v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
(VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
@@ -3913,10 +3925,6 @@ let Predicates = [HasBWI, HasVLX] in {
def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)),
(VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
- def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
- (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
- def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
- (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask),
(VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
@@ -3924,16 +3932,12 @@ let Predicates = [HasBWI, HasVLX] in {
(VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)),
(VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
- def : Pat<(v8f16 (alignedloadv8f16 addr:$src)),
- (VMOVAPSZ128rm addr:$src)>;
def : Pat<(v8f16 (vselect VK8WM:$mask,
(v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
(VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(v8f16 (vselect VK8WM:$mask,
(v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
(VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
- def : Pat<(v8f16 (loadv8f16 addr:$src)),
- (VMOVUPSZ128rm addr:$src)>;
def : Pat<(v8f16 (vselect VK8WM:$mask,
(v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
(VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
@@ -3947,10 +3951,6 @@ let Predicates = [HasBWI, HasVLX] in {
def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)),
(VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
- def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
- (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
- def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
- (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask),
(VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index ec32ac2acad1..74ef831e1658 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -742,8 +742,8 @@ static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI) {
return isPICBase;
}
-bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const {
+bool X86InstrInfo::isReallyTriviallyReMaterializable(
+ const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
// This function should only be called for opcodes with the ReMaterializable
@@ -869,7 +869,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
- MI.isDereferenceableInvariantLoad(AA)) {
+ MI.isDereferenceableInvariantLoad()) {
Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
if (BaseReg == 0 || BaseReg == X86::RIP)
return true;
@@ -3892,6 +3892,10 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Register DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
+ "Load size exceeds stack slot");
if (RC->getID() == X86::TILERegClassID) {
unsigned Opc = X86::TILELOADD;
// tileloadd (%sp, %idx), %tmm
@@ -3913,8 +3917,6 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
FrameIdx);
} else {
- const MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
bool isAligned =
(Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 4943d2152fd2..98da00c39bdb 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -240,8 +240,7 @@ public:
unsigned isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const override;
- bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
- AAResults *AA) const override;
+ bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
Register DestReg, unsigned SubIdx,
const MachineInstr &Orig,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 06cb280e860a..c5557bd5df4e 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -140,6 +140,7 @@ def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
let Predicates = [NoAVX512] in {
def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
+def : Pat<(v8f16 immAllZerosV), (V_SET0)>;
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
@@ -159,6 +160,7 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
let Predicates = [NoAVX512] in {
def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
+def : Pat<(v16f16 immAllZerosV), (AVX_SET0)>;
def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>;
def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
@@ -572,6 +574,23 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVUPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v32i8 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+ def : Pat<(alignedloadv8f16 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv8f16 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+ def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8f16 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedloadv16f16 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
+ def : Pat<(loadv16f16 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
+ def : Pat<(alignedstore (v16f16 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v16f16 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
}
// Use movaps / movups for SSE integer load / store (one byte shorter).
@@ -613,6 +632,17 @@ let Predicates = [UseSSE1] in {
(MOVUPSmr addr:$dst, VR128:$src)>;
}
+let Predicates = [UseSSE2] in {
+ def : Pat<(alignedloadv8f16 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv8f16 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8f16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Low packed FP Instructions
//===----------------------------------------------------------------------===//
@@ -3136,6 +3166,8 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVNTDQYmr addr:$dst, VR256:$src)>;
def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst),
(VMOVNTDQYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignednontemporalstore (v16f16 VR256:$src), addr:$dst),
+ (VMOVNTDQYmr addr:$dst, VR256:$src)>;
def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst),
(VMOVNTDQYmr addr:$dst, VR256:$src)>;
@@ -3143,6 +3175,8 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVNTDQmr addr:$dst, VR128:$src)>;
def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
(VMOVNTDQmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>;
def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
(VMOVNTDQmr addr:$dst, VR128:$src)>;
}
@@ -3152,6 +3186,8 @@ let Predicates = [UseSSE2] in {
(MOVNTDQmr addr:$dst, VR128:$src)>;
def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
(MOVNTDQmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>;
def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
(MOVNTDQmr addr:$dst, VR128:$src)>;
}
@@ -3374,12 +3410,16 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVDQArm addr:$src)>;
def : Pat<(alignedloadv8i16 addr:$src),
(VMOVDQArm addr:$src)>;
+ def : Pat<(alignedloadv8f16 addr:$src),
+ (VMOVDQArm addr:$src)>;
def : Pat<(alignedloadv16i8 addr:$src),
(VMOVDQArm addr:$src)>;
def : Pat<(loadv4i32 addr:$src),
(VMOVDQUrm addr:$src)>;
def : Pat<(loadv8i16 addr:$src),
(VMOVDQUrm addr:$src)>;
+ def : Pat<(loadv8f16 addr:$src),
+ (VMOVDQUrm addr:$src)>;
def : Pat<(loadv16i8 addr:$src),
(VMOVDQUrm addr:$src)>;
@@ -3387,12 +3427,16 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVDQAmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
(VMOVDQAmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst),
+ (VMOVDQAmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
(VMOVDQAmr addr:$dst, VR128:$src)>;
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
(VMOVDQUmr addr:$dst, VR128:$src)>;
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
(VMOVDQUmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8f16 VR128:$src), addr:$dst),
+ (VMOVDQUmr addr:$dst, VR128:$src)>;
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
(VMOVDQUmr addr:$dst, VR128:$src)>;
}
@@ -6431,6 +6475,8 @@ let Predicates = [HasAVX2, NoVLX] in {
(VMOVNTDQAYrm addr:$src)>;
def : Pat<(v16i16 (alignednontemporalload addr:$src)),
(VMOVNTDQAYrm addr:$src)>;
+ def : Pat<(v16f16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQAYrm addr:$src)>;
def : Pat<(v32i8 (alignednontemporalload addr:$src)),
(VMOVNTDQAYrm addr:$src)>;
}
@@ -6446,6 +6492,8 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVNTDQArm addr:$src)>;
def : Pat<(v8i16 (alignednontemporalload addr:$src)),
(VMOVNTDQArm addr:$src)>;
+ def : Pat<(v8f16 (alignednontemporalload addr:$src)),
+ (VMOVNTDQArm addr:$src)>;
def : Pat<(v16i8 (alignednontemporalload addr:$src)),
(VMOVNTDQArm addr:$src)>;
}
@@ -6461,6 +6509,8 @@ let Predicates = [UseSSE41] in {
(MOVNTDQArm addr:$src)>;
def : Pat<(v8i16 (alignednontemporalload addr:$src)),
(MOVNTDQArm addr:$src)>;
+ def : Pat<(v8f16 (alignednontemporalload addr:$src)),
+ (MOVNTDQArm addr:$src)>;
def : Pat<(v16i8 (alignednontemporalload addr:$src)),
(MOVNTDQArm addr:$src)>;
}
@@ -7050,6 +7100,8 @@ def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
+def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTF128 addr:$src)>;
def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF128 addr:$src)>;
}
@@ -7095,6 +7147,7 @@ let Predicates = [HasAVX1Only] in {
defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>;
defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>;
defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
+ defm : vperm2x128_lowering<"VPERM2F128", v16f16, loadv16f16>;
defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>;
}
@@ -7150,6 +7203,8 @@ let Predicates = [HasAVX1Only] in {
defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2i64, v4i64, loadv2i64, loadv4i64>;
defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4i32, v8i32, loadv4i32, loadv8i32>;
defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8i16, v16i16, loadv8i16, loadv16i16>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8f16, v16f16, loadv8f16, loadv16f16>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>;
defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>;
}
@@ -7189,6 +7244,8 @@ let Predicates = [HasAVX1Only] in {
defm : vextract_lowering<"VEXTRACTF128", v4i64, v2i64>;
defm : vextract_lowering<"VEXTRACTF128", v8i32, v4i32>;
defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>;
+ defm : vextract_lowering<"VEXTRACTF128", v16f16, v8f16>;
+ defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>;
defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>;
}
@@ -7503,6 +7560,10 @@ def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0))
(VBLENDPSYrri VR256:$src1,
(INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v16f16 VR256:$src1), (v8f16 VR128:$src2), (iPTR 0)),
+ (VBLENDPSYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
(VBLENDPSYrri VR256:$src1,
(INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
@@ -7517,6 +7578,9 @@ def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0
def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)),
(VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
+def : Pat<(insert_subvector (loadv16f16 addr:$src2), (v8f16 VR128:$src1), (iPTR 0)),
+ (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)),
(VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
@@ -7759,6 +7823,8 @@ let Predicates = [HasAVX2] in {
defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>;
defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>;
defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>;
+ defm : vperm2x128_lowering<"VPERM2I128", v16f16, loadv16f16>;
+ defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>;
defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>;
}
@@ -7781,6 +7847,8 @@ let Predicates = [HasAVX2, NoVLX] in {
defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v2i64, v4i64, loadv2i64, loadv4i64>;
defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v4i32, v8i32, loadv4i32, loadv8i32>;
defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8i16, v16i16, loadv8i16, loadv16i16>;
+ defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8f16, v16f16, loadv8f16, loadv16f16>;
+ defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>;
defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>;
}
@@ -7801,6 +7869,8 @@ let Predicates = [HasAVX2, NoVLX] in {
defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>;
defm : vextract_lowering<"VEXTRACTI128", v8i32, v4i32>;
defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>;
+ defm : vextract_lowering<"VEXTRACTI128", v16f16, v8f16>;
+ defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>;
defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>;
}
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index f4e25e4194db..1de2a1725954 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -254,8 +254,12 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
StringRef CPU =
CPUAttr.isValid() ? CPUAttr.getValueAsString() : (StringRef)TargetCPU;
- StringRef TuneCPU =
- TuneAttr.isValid() ? TuneAttr.getValueAsString() : (StringRef)CPU;
+ // "x86-64" is a default target setting for many front ends. In these cases,
+ // they actually request for "generic" tuning unless the "tune-cpu" was
+ // specified.
+ StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString()
+ : CPU == "x86-64" ? "generic"
+ : (StringRef)CPU;
StringRef FS =
FSAttr.isValid() ? FSAttr.getValueAsString() : (StringRef)TargetFS;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index b36f8a3d06d0..b27aac9c4e93 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1297,29 +1297,6 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
LT.first = NumOfDests * NumOfShufflesPerDest;
}
- static const CostTblEntry AVX512FP16ShuffleTbl[] = {
- {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v16f16, 1}, // vpbroadcastw
- {TTI::SK_Broadcast, MVT::v8f16, 1}, // vpbroadcastw
-
- {TTI::SK_Reverse, MVT::v32f16, 2}, // vpermw
- {TTI::SK_Reverse, MVT::v16f16, 2}, // vpermw
- {TTI::SK_Reverse, MVT::v8f16, 1}, // vpshufb
-
- {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, // vpermw
- {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, // vpermw
- {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // vpshufb
-
- {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, // vpermt2w
- {TTI::SK_PermuteTwoSrc, MVT::v16f16, 2}, // vpermt2w
- {TTI::SK_PermuteTwoSrc, MVT::v8f16, 2} // vpermt2w
- };
-
- if (!ST->useSoftFloat() && ST->hasFP16())
- if (const auto *Entry =
- CostTableLookup(AVX512FP16ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
-
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
{TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
{TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb
@@ -1339,17 +1316,22 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
static const CostTblEntry AVX512BWShuffleTbl[] = {
{TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw
{TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb
{TTI::SK_Reverse, MVT::v32i16, 2}, // vpermw
+ {TTI::SK_Reverse, MVT::v32f16, 2}, // vpermw
{TTI::SK_Reverse, MVT::v16i16, 2}, // vpermw
{TTI::SK_Reverse, MVT::v64i8, 2}, // pshufb + vshufi64x2
{TTI::SK_PermuteSingleSrc, MVT::v32i16, 2}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, // vpermw
{TTI::SK_PermuteSingleSrc, MVT::v16i16, 2}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, // vpermw
{TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, // extend to v32i16
{TTI::SK_PermuteTwoSrc, MVT::v32i16, 2}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, // vpermt2w
{TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, // vpermt2w
{TTI::SK_PermuteTwoSrc, MVT::v8i16, 2}, // vpermt2w
{TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1
@@ -1369,6 +1351,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Broadcast, MVT::v8i64, 1}, // vpbroadcastq
{TTI::SK_Broadcast, MVT::v16i32, 1}, // vpbroadcastd
{TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw
{TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb
{TTI::SK_Reverse, MVT::v8f64, 1}, // vpermpd
@@ -1376,6 +1359,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Reverse, MVT::v8i64, 1}, // vpermq
{TTI::SK_Reverse, MVT::v16i32, 1}, // vpermd
{TTI::SK_Reverse, MVT::v32i16, 7}, // per mca
+ {TTI::SK_Reverse, MVT::v32f16, 7}, // per mca
{TTI::SK_Reverse, MVT::v64i8, 7}, // per mca
{TTI::SK_PermuteSingleSrc, MVT::v8f64, 1}, // vpermpd
@@ -1408,11 +1392,14 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// FIXME: This just applies the type legalization cost rules above
// assuming these completely split.
{TTI::SK_PermuteSingleSrc, MVT::v32i16, 14},
+ {TTI::SK_PermuteSingleSrc, MVT::v32f16, 14},
{TTI::SK_PermuteSingleSrc, MVT::v64i8, 14},
{TTI::SK_PermuteTwoSrc, MVT::v32i16, 42},
+ {TTI::SK_PermuteTwoSrc, MVT::v32f16, 42},
{TTI::SK_PermuteTwoSrc, MVT::v64i8, 42},
{TTI::SK_Select, MVT::v32i16, 1}, // vpternlogq
+ {TTI::SK_Select, MVT::v32f16, 1}, // vpternlogq
{TTI::SK_Select, MVT::v64i8, 1}, // vpternlogq
{TTI::SK_Select, MVT::v8f64, 1}, // vblendmpd
{TTI::SK_Select, MVT::v16f32, 1}, // vblendmps
@@ -1430,6 +1417,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Broadcast, MVT::v4i64, 1}, // vpbroadcastq
{TTI::SK_Broadcast, MVT::v8i32, 1}, // vpbroadcastd
{TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16f16, 1}, // vpbroadcastw
{TTI::SK_Broadcast, MVT::v32i8, 1}, // vpbroadcastb
{TTI::SK_Reverse, MVT::v4f64, 1}, // vpermpd
@@ -1437,9 +1425,11 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Reverse, MVT::v4i64, 1}, // vpermq
{TTI::SK_Reverse, MVT::v8i32, 1}, // vpermd
{TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb
+ {TTI::SK_Reverse, MVT::v16f16, 2}, // vperm2i128 + pshufb
{TTI::SK_Reverse, MVT::v32i8, 2}, // vperm2i128 + pshufb
{TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb
+ {TTI::SK_Select, MVT::v16f16, 1}, // vpblendvb
{TTI::SK_Select, MVT::v32i8, 1}, // vpblendvb
{TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd
@@ -1448,6 +1438,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd
{TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 + 2*vpshufb
// + vpblendvb
+ {TTI::SK_PermuteSingleSrc, MVT::v16f16, 4}, // vperm2i128 + 2*vpshufb
+ // + vpblendvb
{TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vperm2i128 + 2*vpshufb
// + vpblendvb
@@ -1457,6 +1449,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_PermuteTwoSrc, MVT::v8i32, 3}, // 2*vpermd + vpblendd
{TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb
// + vpblendvb
+ {TTI::SK_PermuteTwoSrc, MVT::v16f16, 7}, // 2*vperm2i128 + 4*vpshufb
+ // + vpblendvb
{TTI::SK_PermuteTwoSrc, MVT::v32i8, 7}, // 2*vperm2i128 + 4*vpshufb
// + vpblendvb
};
@@ -1493,6 +1487,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Broadcast, MVT::v4i64, 2}, // vperm2f128 + vpermilpd
{TTI::SK_Broadcast, MVT::v8i32, 2}, // vperm2f128 + vpermilps
{TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd + vinsertf128
+ {TTI::SK_Broadcast, MVT::v16f16, 3}, // vpshuflw + vpshufd + vinsertf128
{TTI::SK_Broadcast, MVT::v32i8, 2}, // vpshufb + vinsertf128
{TTI::SK_Reverse, MVT::v4f64, 2}, // vperm2f128 + vpermilpd
@@ -1501,6 +1496,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Reverse, MVT::v8i32, 2}, // vperm2f128 + vpermilps
{TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb
// + vinsertf128
+ {TTI::SK_Reverse, MVT::v16f16, 4}, // vextractf128 + 2*pshufb
+ // + vinsertf128
{TTI::SK_Reverse, MVT::v32i8, 4}, // vextractf128 + 2*pshufb
// + vinsertf128
@@ -1509,6 +1506,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Select, MVT::v8i32, 1}, // vblendps
{TTI::SK_Select, MVT::v8f32, 1}, // vblendps
{TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor
+ {TTI::SK_Select, MVT::v16f16, 3}, // vpand + vpandn + vpor
{TTI::SK_Select, MVT::v32i8, 3}, // vpand + vpandn + vpor
{TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vshufpd
@@ -1517,6 +1515,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_PermuteSingleSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps
{TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 + 4*pshufb
// + 2*por + vinsertf128
+ {TTI::SK_PermuteSingleSrc, MVT::v16f16, 8}, // vextractf128 + 4*pshufb
+ // + 2*por + vinsertf128
{TTI::SK_PermuteSingleSrc, MVT::v32i8, 8}, // vextractf128 + 4*pshufb
// + 2*por + vinsertf128
@@ -1526,6 +1526,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_PermuteTwoSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps
{TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 + 8*pshufb
// + 4*por + vinsertf128
+ {TTI::SK_PermuteTwoSrc, MVT::v16f16, 15}, // 2*vextractf128 + 8*pshufb
+ // + 4*por + vinsertf128
{TTI::SK_PermuteTwoSrc, MVT::v32i8, 15}, // 2*vextractf128 + 8*pshufb
// + 4*por + vinsertf128
};
@@ -1540,6 +1542,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Select, MVT::v4i32, 1}, // pblendw
{TTI::SK_Select, MVT::v4f32, 1}, // blendps
{TTI::SK_Select, MVT::v8i16, 1}, // pblendw
+ {TTI::SK_Select, MVT::v8f16, 1}, // pblendw
{TTI::SK_Select, MVT::v16i8, 1} // pblendvb
};
@@ -1549,18 +1552,23 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
static const CostTblEntry SSSE3ShuffleTbl[] = {
{TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb
+ {TTI::SK_Broadcast, MVT::v8f16, 1}, // pshufb
{TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb
{TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb
+ {TTI::SK_Reverse, MVT::v8f16, 1}, // pshufb
{TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb
{TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por
+ {TTI::SK_Select, MVT::v8f16, 3}, // 2*pshufb + por
{TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por
{TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb
+ {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // pshufb
{TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb
{TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por
+ {TTI::SK_PermuteTwoSrc, MVT::v8f16, 3}, // 2*pshufb + por
{TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por
};
@@ -1573,12 +1581,14 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd
{TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd
{TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd
+ {TTI::SK_Broadcast, MVT::v8f16, 2}, // pshuflw + pshufd
{TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd
{TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd
{TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd
{TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd
{TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd
+ {TTI::SK_Reverse, MVT::v8f16, 3}, // pshuflw + pshufhw + pshufd
{TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw
// + 2*pshufd + 2*unpck + packus
@@ -1586,6 +1596,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Select, MVT::v2f64, 1}, // movsd
{TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps
{TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por
+ {TTI::SK_Select, MVT::v8f16, 3}, // pand + pandn + por
{TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por
{TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd
@@ -1593,6 +1604,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd
{TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw
// + pshufd/unpck
+ {TTI::SK_PermuteSingleSrc, MVT::v8f16, 5}, // 2*pshuflw + 2*pshufhw
+ // + pshufd/unpck
{ TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw
// + 2*pshufd + 2*unpck + 2*packus
@@ -1600,6 +1613,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{ TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // shufpd
{ TTI::SK_PermuteTwoSrc, MVT::v4i32, 2 }, // 2*{unpck,movsd,pshufd}
{ TTI::SK_PermuteTwoSrc, MVT::v8i16, 8 }, // blend+permute
+ { TTI::SK_PermuteTwoSrc, MVT::v8f16, 8 }, // blend+permute
{ TTI::SK_PermuteTwoSrc, MVT::v16i8, 13 }, // blend+permute
};
@@ -5219,7 +5233,7 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) {
if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
return true;
- if (ScalarTy->isHalfTy() && ST->hasBWI() && ST->hasFP16())
+ if (ScalarTy->isHalfTy() && ST->hasBWI())
return true;
if (!ScalarTy->isIntegerTy())
@@ -5674,8 +5688,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
EltTy->isIntegerTy(32) || EltTy->isPointerTy())
return true;
- if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8) ||
- (!ST->useSoftFloat() && ST->hasFP16() && EltTy->isHalfTy()))
+ if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8) || EltTy->isHalfTy())
return HasBW;
return false;
};
diff --git a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index 19ebcb3ea3e8..2fb06e29bf3b 100644
--- a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -27,7 +27,7 @@
#include "llvm/IR/Function.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetOptions.h"
-#include <algorithm> // std::sort
+#include <algorithm>
using namespace llvm;
diff --git a/llvm/lib/ToolDrivers/llvm-lib/Options.td b/llvm/lib/ToolDrivers/llvm-lib/Options.td
index 9d969b040ef2..4af250e8ad73 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/Options.td
+++ b/llvm/lib/ToolDrivers/llvm-lib/Options.td
@@ -48,3 +48,4 @@ def nodefaultlib: P<"nodefaultlib", "">;
def nodefaultlib_all: F<"nodefaultlib">;
def nologo : F<"nologo">;
def subsystem : P<"subsystem", "">;
+def verbose : F<"verbose">;
diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index f7bbdcffd2ec..81b43a2ab2c2 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -112,8 +112,8 @@ static bool declaresCoroCleanupIntrinsics(const Module &M) {
return coro::declaresIntrinsics(
M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr",
"llvm.coro.free", "llvm.coro.id", "llvm.coro.id.retcon",
- "llvm.coro.id.retcon.once", "llvm.coro.async.size.replace",
- "llvm.coro.async.resume"});
+ "llvm.coro.id.async", "llvm.coro.id.retcon.once",
+ "llvm.coro.async.size.replace", "llvm.coro.async.resume"});
}
PreservedAnalyses CoroCleanupPass::run(Module &M,
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 5557370c82ba..af35b45c2eaf 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -17,8 +17,6 @@
namespace llvm {
class CallGraph;
-class CallGraphSCC;
-class PassRegistry;
namespace coro {
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 9c1b247cdb39..722a1c6ec0ce 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1961,6 +1961,13 @@ static coro::Shape splitCoroutine(Function &F,
return Shape;
}
+/// Remove calls to llvm.coro.end in the original function.
+static void removeCoroEnds(const coro::Shape &Shape) {
+ for (auto End : Shape.CoroEnds) {
+ replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr);
+ }
+}
+
static void updateCallGraphAfterCoroutineSplit(
LazyCallGraph::Node &N, const coro::Shape &Shape,
const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C,
@@ -1969,10 +1976,14 @@ static void updateCallGraphAfterCoroutineSplit(
if (!Shape.CoroBegin)
return;
- for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
- auto &Context = End->getContext();
- End->replaceAllUsesWith(ConstantInt::getFalse(Context));
- End->eraseFromParent();
+ if (Shape.ABI != coro::ABI::Switch)
+ removeCoroEnds(Shape);
+ else {
+ for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
+ auto &Context = End->getContext();
+ End->replaceAllUsesWith(ConstantInt::getFalse(Context));
+ End->eraseFromParent();
+ }
}
if (!Clones.empty()) {
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index e5ff98e4f73f..37c773bd47d6 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -326,7 +326,7 @@ static bool getPotentialCopiesOfMemoryValue(
<< " (only exact: " << OnlyExact << ")\n";);
Value &Ptr = *I.getPointerOperand();
- SmallVector<Value *, 8> Objects;
+ SmallSetVector<Value *, 8> Objects;
if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &I,
UsedAssumedInformation)) {
LLVM_DEBUG(
@@ -343,6 +343,7 @@ static bool getPotentialCopiesOfMemoryValue(
const auto *TLI =
A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction());
+ LLVM_DEBUG(dbgs() << "Visit " << Objects.size() << " objects:\n");
for (Value *Obj : Objects) {
LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n");
if (isa<UndefValue>(Obj))
@@ -352,8 +353,8 @@ static bool getPotentialCopiesOfMemoryValue(
// be OK. We do not try to optimize the latter.
if (!NullPointerIsDefined(I.getFunction(),
Ptr.getType()->getPointerAddressSpace()) &&
- A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) ==
- Obj)
+ A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation,
+ AA::Interprocedural) == Obj)
continue;
LLVM_DEBUG(
dbgs() << "Underlying object is a valid nullptr, giving up.\n";);
@@ -375,25 +376,37 @@ static bool getPotentialCopiesOfMemoryValue(
return false;
}
- if (IsLoad) {
- Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI);
- if (!InitialValue)
- return false;
- NewCopies.push_back(InitialValue);
- NewCopyOrigins.push_back(nullptr);
- }
+ bool NullOnly = true;
+ bool NullRequired = false;
+ auto CheckForNullOnlyAndUndef = [&](Optional<Value *> V, bool IsExact) {
+ if (!V || *V == nullptr)
+ NullOnly = false;
+ else if (isa<UndefValue>(*V))
+ /* No op */;
+ else if (isa<Constant>(*V) && cast<Constant>(*V)->isNullValue())
+ NullRequired = !IsExact;
+ else
+ NullOnly = false;
+ };
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead()))
return true;
if (IsLoad && Acc.isWrittenValueYetUndetermined())
return true;
- if (OnlyExact && !IsExact &&
+ CheckForNullOnlyAndUndef(Acc.getContent(), IsExact);
+ if (OnlyExact && !IsExact && !NullOnly &&
!isa_and_nonnull<UndefValue>(Acc.getWrittenValue())) {
LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst()
<< ", abort!\n");
return false;
}
+ if (NullRequired && !NullOnly) {
+ LLVM_DEBUG(dbgs() << "Required all `null` accesses due to non exact "
+ "one, however found non-null one: "
+ << *Acc.getRemoteInst() << ", abort!\n");
+ return false;
+ }
if (IsLoad) {
assert(isa<LoadInst>(I) && "Expected load or store instruction only!");
if (!Acc.isWrittenValueUnknown()) {
@@ -424,15 +437,36 @@ static bool getPotentialCopiesOfMemoryValue(
return true;
};
+ // If the value has been written to we don't need the initial value of the
+ // object.
+ bool HasBeenWrittenTo = false;
+
auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(*Obj),
DepClassTy::NONE);
- if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess)) {
+ if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess,
+ HasBeenWrittenTo)) {
LLVM_DEBUG(
dbgs()
<< "Failed to verify all interfering accesses for underlying object: "
<< *Obj << "\n");
return false;
}
+
+ if (IsLoad && !HasBeenWrittenTo) {
+ Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI);
+ if (!InitialValue)
+ return false;
+ CheckForNullOnlyAndUndef(InitialValue, /* IsExact */ true);
+ if (NullRequired && !NullOnly) {
+ LLVM_DEBUG(dbgs() << "Non exact access but initial value that is not "
+ "null or undef, abort!\n");
+ return false;
+ }
+
+ NewCopies.push_back(InitialValue);
+ NewCopyOrigins.push_back(nullptr);
+ }
+
PIs.push_back(&PI);
}
@@ -520,12 +554,21 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
<< " from " << FromI << " [GBCB: " << bool(GoBackwardsCB)
<< "]\n");
+ // TODO: If we can go arbitrarily backwards we will eventually reach an
+ // entry point that can reach ToI. Only once this takes a set of blocks
+ // through which we cannot go, or once we track internal functions not
+ // accessible from the outside, it makes sense to perform backwards analysis
+ // in the absence of a GoBackwardsCB.
+ if (!GoBackwardsCB) {
+ LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from " << FromI
+ << " is not checked backwards, abort\n");
+ return true;
+ }
+
SmallPtrSet<const Instruction *, 8> Visited;
SmallVector<const Instruction *> Worklist;
Worklist.push_back(&FromI);
- const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL);
while (!Worklist.empty()) {
const Instruction *CurFromI = Worklist.pop_back_val();
if (!Visited.insert(CurFromI).second)
@@ -545,26 +588,13 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
<< *ToI << " [Intra]\n");
if (Result)
return true;
- if (NoRecurseAA.isAssumedNoRecurse())
- continue;
- }
-
- // TODO: If we can go arbitrarily backwards we will eventually reach an
- // entry point that can reach ToI. Only once this takes a set of blocks
- // through which we cannot go, or once we track internal functions not
- // accessible from the outside, it makes sense to perform backwards analysis
- // in the absence of a GoBackwardsCB.
- if (!GoBackwardsCB) {
- LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from "
- << *CurFromI << " is not checked backwards, abort\n");
- return true;
}
// Check if the current instruction is already known to reach the ToFn.
const auto &FnReachabilityAA = A.getAAFor<AAFunctionReachability>(
QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL);
bool Result = FnReachabilityAA.instructionCanReach(
- A, *CurFromI, ToFn, /* UseBackwards */ false);
+ A, *CurFromI, ToFn);
LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName()
<< " " << (Result ? "can potentially " : "cannot ")
<< "reach @" << ToFn.getName() << " [FromFn]\n");
@@ -1038,60 +1068,74 @@ Attributor::getAssumedConstant(const IRPosition &IRP,
}
if (auto *C = dyn_cast<Constant>(&IRP.getAssociatedValue()))
return C;
- const auto &ValueSimplifyAA =
- getAAFor<AAValueSimplify>(AA, IRP, DepClassTy::NONE);
- Optional<Value *> SimplifiedV =
- ValueSimplifyAA.getAssumedSimplifiedValue(*this);
- bool IsKnown = ValueSimplifyAA.isAtFixpoint();
- UsedAssumedInformation |= !IsKnown;
- if (!SimplifiedV) {
- recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
- return llvm::None;
- }
- if (isa_and_nonnull<UndefValue>(SimplifiedV.value())) {
- recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
- return UndefValue::get(IRP.getAssociatedType());
+ SmallVector<AA::ValueAndContext> Values;
+ if (getAssumedSimplifiedValues(IRP, &AA, Values,
+ AA::ValueScope::Interprocedural,
+ UsedAssumedInformation)) {
+ if (Values.empty())
+ return llvm::None;
+ if (auto *C = dyn_cast_or_null<Constant>(
+ AAPotentialValues::getSingleValue(*this, AA, IRP, Values)))
+ return C;
}
- Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.value());
- if (CI)
- CI = dyn_cast_or_null<Constant>(
- AA::getWithType(*CI, *IRP.getAssociatedType()));
- if (CI)
- recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
- return CI;
+ return nullptr;
}
-Optional<Value *>
-Attributor::getAssumedSimplified(const IRPosition &IRP,
- const AbstractAttribute *AA,
- bool &UsedAssumedInformation) {
+Optional<Value *> Attributor::getAssumedSimplified(const IRPosition &IRP,
+ const AbstractAttribute *AA,
+ bool &UsedAssumedInformation,
+ AA::ValueScope S) {
// First check all callbacks provided by outside AAs. If any of them returns
// a non-null value that is different from the associated value, or None, we
// assume it's simplified.
for (auto &CB : SimplificationCallbacks.lookup(IRP))
return CB(IRP, AA, UsedAssumedInformation);
- // If no high-level/outside simplification occurred, use AAValueSimplify.
- const auto &ValueSimplifyAA =
- getOrCreateAAFor<AAValueSimplify>(IRP, AA, DepClassTy::NONE);
- Optional<Value *> SimplifiedV =
- ValueSimplifyAA.getAssumedSimplifiedValue(*this);
- bool IsKnown = ValueSimplifyAA.isAtFixpoint();
- UsedAssumedInformation |= !IsKnown;
- if (!SimplifiedV) {
- if (AA)
- recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL);
+ SmallVector<AA::ValueAndContext> Values;
+ if (!getAssumedSimplifiedValues(IRP, AA, Values, S, UsedAssumedInformation))
+ return &IRP.getAssociatedValue();
+ if (Values.empty())
return llvm::None;
+ if (AA)
+ if (Value *V = AAPotentialValues::getSingleValue(*this, *AA, IRP, Values))
+ return V;
+ if (IRP.getPositionKind() == IRPosition::IRP_RETURNED ||
+ IRP.getPositionKind() == IRPosition::IRP_CALL_SITE_RETURNED)
+ return nullptr;
+ return &IRP.getAssociatedValue();
+}
+
+bool Attributor::getAssumedSimplifiedValues(
+ const IRPosition &IRP, const AbstractAttribute *AA,
+ SmallVectorImpl<AA::ValueAndContext> &Values, AA::ValueScope S,
+ bool &UsedAssumedInformation) {
+ // First check all callbacks provided by outside AAs. If any of them returns
+ // a non-null value that is different from the associated value, or None, we
+ // assume it's simplified.
+ const auto &SimplificationCBs = SimplificationCallbacks.lookup(IRP);
+ for (auto &CB : SimplificationCBs) {
+ Optional<Value *> CBResult = CB(IRP, AA, UsedAssumedInformation);
+ if (!CBResult.has_value())
+ continue;
+ Value *V = CBResult.value();
+ if (!V)
+ return false;
+ if ((S & AA::ValueScope::Interprocedural) ||
+ AA::isValidInScope(*V, IRP.getAnchorScope()))
+ Values.push_back(AA::ValueAndContext{*V, nullptr});
+ else
+ return false;
}
- if (*SimplifiedV == nullptr)
- return const_cast<Value *>(&IRP.getAssociatedValue());
- if (Value *SimpleV =
- AA::getWithType(**SimplifiedV, *IRP.getAssociatedType())) {
- if (AA)
- recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL);
- return SimpleV;
- }
- return const_cast<Value *>(&IRP.getAssociatedValue());
+ if (!SimplificationCBs.empty())
+ return true;
+
+ // If no high-level/outside simplification occurred, use AAPotentialValues.
+ const auto &PotentialValuesAA =
+ getOrCreateAAFor<AAPotentialValues>(IRP, AA, DepClassTy::OPTIONAL);
+ if (!PotentialValuesAA.getAssumedSimplifiedValues(*this, Values, S))
+ return false;
+ UsedAssumedInformation |= !PotentialValuesAA.isAtFixpoint();
+ return true;
}
Optional<Value *> Attributor::translateArgumentToCallSiteContent(
@@ -1106,7 +1150,7 @@ Optional<Value *> Attributor::translateArgumentToCallSiteContent(
if (!Arg->hasPointeeInMemoryValueAttr())
return getAssumedSimplified(
IRPosition::callsite_argument(CB, Arg->getArgNo()), AA,
- UsedAssumedInformation);
+ UsedAssumedInformation, AA::Intraprocedural);
return nullptr;
}
@@ -1295,8 +1339,21 @@ bool Attributor::checkForAllUses(
SmallVector<const Use *, 16> Worklist;
SmallPtrSet<const Use *, 16> Visited;
- for (const Use &U : V.uses())
- Worklist.push_back(&U);
+ auto AddUsers = [&](const Value &V, const Use *OldUse) {
+ for (const Use &UU : V.uses()) {
+ if (OldUse && EquivalentUseCB && !EquivalentUseCB(*OldUse, UU)) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Potential copy was "
+ "rejected by the equivalence call back: "
+ << *UU << "!\n");
+ return false;
+ }
+
+ Worklist.push_back(&UU);
+ }
+ return true;
+ };
+
+ AddUsers(V, /* OldUse */ nullptr);
LLVM_DEBUG(dbgs() << "[Attributor] Got " << Worklist.size()
<< " initial uses to check\n");
@@ -1342,15 +1399,8 @@ bool Attributor::checkForAllUses(
<< PotentialCopies.size()
<< " potential copies instead!\n");
for (Value *PotentialCopy : PotentialCopies)
- for (const Use &CopyUse : PotentialCopy->uses()) {
- if (EquivalentUseCB && !EquivalentUseCB(*U, CopyUse)) {
- LLVM_DEBUG(dbgs() << "[Attributor] Potential copy was "
- "rejected by the equivalence call back: "
- << *CopyUse << "!\n");
- return false;
- }
- Worklist.push_back(&CopyUse);
- }
+ if (!AddUsers(*PotentialCopy, U))
+ return false;
continue;
}
}
@@ -1361,8 +1411,25 @@ bool Attributor::checkForAllUses(
return false;
if (!Follow)
continue;
- for (const Use &UU : U->getUser()->uses())
- Worklist.push_back(&UU);
+
+ User &Usr = *U->getUser();
+ AddUsers(Usr, /* OldUse */ nullptr);
+
+ auto *RI = dyn_cast<ReturnInst>(&Usr);
+ if (!RI)
+ continue;
+
+ Function &F = *RI->getFunction();
+ auto CallSitePred = [&](AbstractCallSite ACS) {
+ return AddUsers(*ACS.getInstruction(), U);
+ };
+ if (!checkForAllCallSites(CallSitePred, F, /* RequireAllCallSites */ true,
+ &QueryingAA, UsedAssumedInformation)) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Could not follow return instruction "
+ "to all call sites: "
+ << *RI << "\n");
+ return false;
+ }
}
return true;
@@ -1918,7 +1985,8 @@ ChangeStatus Attributor::cleanupIR() {
<< ToBeDeletedInsts.size() << " instructions and "
<< ToBeChangedValues.size() << " values and "
<< ToBeChangedUses.size() << " uses. To insert "
- << ToBeChangedToUnreachableInsts.size() << " unreachables."
+ << ToBeChangedToUnreachableInsts.size()
+ << " unreachables.\n"
<< "Preserve manifest added " << ManifestAddedBlocks.size()
<< " blocks\n");
@@ -2046,6 +2114,8 @@ ChangeStatus Attributor::cleanupIR() {
}
for (auto &V : ToBeChangedToUnreachableInsts)
if (Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ LLVM_DEBUG(dbgs() << "[Attributor] Change to unreachable: " << *I
+ << "\n");
assert(isRunOn(*I->getFunction()) &&
"Cannot replace an instruction outside the current SCC!");
CGModifiedFunctions.insert(I->getFunction());
@@ -2877,7 +2947,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every function might be simplified.
bool UsedAssumedInformation = false;
- getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation);
+ getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation,
+ AA::Intraprocedural);
// Every returned value might be marked noundef.
getOrCreateAAFor<AANoUndef>(RetPos);
@@ -2906,7 +2977,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// interface though as outside AAs can register custom simplification
// callbacks.
bool UsedAssumedInformation = false;
- getAssumedSimplified(ArgPos, /* AA */ nullptr, UsedAssumedInformation);
+ getAssumedSimplified(ArgPos, /* AA */ nullptr, UsedAssumedInformation,
+ AA::Intraprocedural);
// Every argument might be dead.
getOrCreateAAFor<AAIsDead>(ArgPos);
@@ -2970,7 +3042,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
IRPosition CBRetPos = IRPosition::callsite_returned(CB);
bool UsedAssumedInformation = false;
- getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation);
+ getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation,
+ AA::Intraprocedural);
}
for (int I = 0, E = CB.arg_size(); I < E; ++I) {
@@ -2984,7 +3057,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Attributor interface though as outside AAs can register custom
// simplification callbacks.
bool UsedAssumedInformation = false;
- getAssumedSimplified(CBArgPos, /* AA */ nullptr, UsedAssumedInformation);
+ getAssumedSimplified(CBArgPos, /* AA */ nullptr, UsedAssumedInformation,
+ AA::Intraprocedural);
// Every call site argument might be marked "noundef".
getOrCreateAAFor<AANoUndef>(CBArgPos);
@@ -3034,12 +3108,12 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
IRPosition::value(*cast<LoadInst>(I).getPointerOperand()));
if (SimplifyAllLoads)
getAssumedSimplified(IRPosition::value(I), nullptr,
- UsedAssumedInformation);
+ UsedAssumedInformation, AA::Intraprocedural);
} else {
auto &SI = cast<StoreInst>(I);
getOrCreateAAFor<AAIsDead>(IRPosition::inst(I));
getAssumedSimplified(IRPosition::value(*SI.getValueOperand()), nullptr,
- UsedAssumedInformation);
+ UsedAssumedInformation, AA::Intraprocedural);
getOrCreateAAFor<AAAlign>(IRPosition::value(*SI.getPointerOperand()));
}
return true;
@@ -3126,6 +3200,26 @@ raw_ostream &llvm::operator<<(raw_ostream &OS,
return OS;
}
+raw_ostream &llvm::operator<<(raw_ostream &OS,
+ const PotentialLLVMValuesState &S) {
+ OS << "set-state(< {";
+ if (!S.isValidState())
+ OS << "full-set";
+ else {
+ for (auto &It : S.getAssumedSet()) {
+ if (auto *F = dyn_cast<Function>(It.first.getValue()))
+ OS << "@" << F->getName() << "[" << int(It.second) << "], ";
+ else
+ OS << *It.first.getValue() << "[" << int(It.second) << "], ";
+ }
+ if (S.undefIsContained())
+ OS << "undef ";
+ }
+ OS << "} >)";
+
+ return OS;
+}
+
void AbstractAttribute::print(raw_ostream &OS) const {
OS << "[";
OS << getName();
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 1ff54b78e27e..660ff3ee9563 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -14,12 +14,14 @@
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
@@ -35,11 +37,13 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -72,6 +76,8 @@ static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128),
template <>
unsigned llvm::PotentialConstantIntValuesState::MaxPotentialValues = 0;
+template <> unsigned llvm::PotentialLLVMValuesState::MaxPotentialValues = -1;
+
static cl::opt<unsigned, true> MaxPotentialValues(
"attributor-max-potential-values", cl::Hidden,
cl::desc("Maximum number of potential values to be "
@@ -79,6 +85,12 @@ static cl::opt<unsigned, true> MaxPotentialValues(
cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues),
cl::init(7));
+static cl::opt<int> MaxPotentialValuesIterations(
+ "attributor-max-potential-values-iterations", cl::Hidden,
+ cl::desc(
+ "Maximum number of iterations we keep dismantling potential values."),
+ cl::init(64));
+
static cl::opt<unsigned> MaxInterferingAccesses(
"attributor-max-interfering-accesses", cl::Hidden,
cl::desc("Maximum number of interfering accesses to "
@@ -162,6 +174,7 @@ PIPE_OPERATOR(AAValueConstantRange)
PIPE_OPERATOR(AAPrivatizablePtr)
PIPE_OPERATOR(AAUndefinedBehavior)
PIPE_OPERATOR(AAPotentialConstantValues)
+PIPE_OPERATOR(AAPotentialValues)
PIPE_OPERATOR(AANoUndef)
PIPE_OPERATOR(AACallEdges)
PIPE_OPERATOR(AAFunctionReachability)
@@ -293,228 +306,35 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
return Ptr;
}
-/// Recursively visit all values that might become \p IRP at some point. This
-/// will be done by looking through cast instructions, selects, phis, and calls
-/// with the "returned" attribute. Once we cannot look through the value any
-/// further, the callback \p VisitValueCB is invoked and passed the current
-/// value, the \p State, and a flag to indicate if we stripped anything.
-/// Stripped means that we unpacked the value associated with \p IRP at least
-/// once. Note that the value used for the callback may still be the value
-/// associated with \p IRP (due to PHIs). To limit how much effort is invested,
-/// we will never visit more values than specified by \p MaxValues.
-/// If \p VS does not contain the Interprocedural bit, only values valid in the
-/// scope of \p CtxI will be visited and simplification into other scopes is
-/// prevented.
-template <typename StateTy>
-static bool genericValueTraversal(
- Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA,
- StateTy &State,
- function_ref<bool(Value &, const Instruction *, StateTy &, bool)>
- VisitValueCB,
- const Instruction *CtxI, bool &UsedAssumedInformation,
- bool UseValueSimplify = true, int MaxValues = 16,
- function_ref<Value *(Value *)> StripCB = nullptr,
- AA::ValueScope VS = AA::Interprocedural) {
-
- struct LivenessInfo {
- const AAIsDead *LivenessAA = nullptr;
- bool AnyDead = false;
- };
- SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs;
- auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & {
- LivenessInfo &LI = LivenessAAs[&F];
- if (!LI.LivenessAA)
- LI.LivenessAA = &A.getAAFor<AAIsDead>(QueryingAA, IRPosition::function(F),
- DepClassTy::NONE);
- return LI;
- };
-
- Value *InitialV = &IRP.getAssociatedValue();
- using Item = std::pair<Value *, const Instruction *>;
- SmallSet<Item, 16> Visited;
- SmallVector<Item, 16> Worklist;
- Worklist.push_back({InitialV, CtxI});
-
- int Iteration = 0;
- do {
- Item I = Worklist.pop_back_val();
- Value *V = I.first;
- CtxI = I.second;
- if (StripCB)
- V = StripCB(V);
-
- // Check if we should process the current value. To prevent endless
- // recursion keep a record of the values we followed!
- if (!Visited.insert(I).second)
- continue;
-
- // Make sure we limit the compile time for complex expressions.
- if (Iteration++ >= MaxValues) {
- LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: "
- << Iteration << "!\n");
- return false;
- }
-
- // Explicitly look through calls with a "returned" attribute if we do
- // not have a pointer as stripPointerCasts only works on them.
- Value *NewV = nullptr;
- if (V->getType()->isPointerTy()) {
- NewV = V->stripPointerCasts();
- } else {
- auto *CB = dyn_cast<CallBase>(V);
- if (CB && CB->getCalledFunction()) {
- for (Argument &Arg : CB->getCalledFunction()->args())
- if (Arg.hasReturnedAttr()) {
- NewV = CB->getArgOperand(Arg.getArgNo());
- break;
- }
- }
- }
- if (NewV && NewV != V) {
- Worklist.push_back({NewV, CtxI});
- continue;
- }
-
- // Look through select instructions, visit assumed potential values.
- if (auto *SI = dyn_cast<SelectInst>(V)) {
- Optional<Constant *> C = A.getAssumedConstant(
- *SI->getCondition(), QueryingAA, UsedAssumedInformation);
- bool NoValueYet = !C;
- if (NoValueYet || isa_and_nonnull<UndefValue>(*C))
- continue;
- if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) {
- if (CI->isZero())
- Worklist.push_back({SI->getFalseValue(), CtxI});
- else
- Worklist.push_back({SI->getTrueValue(), CtxI});
- continue;
- }
- // We could not simplify the condition, assume both values.(
- Worklist.push_back({SI->getTrueValue(), CtxI});
- Worklist.push_back({SI->getFalseValue(), CtxI});
- continue;
- }
-
- // Look through phi nodes, visit all live operands.
- if (auto *PHI = dyn_cast<PHINode>(V)) {
- LivenessInfo &LI = GetLivenessInfo(*PHI->getFunction());
- for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
- BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
- if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) {
- LI.AnyDead = true;
- UsedAssumedInformation |= !LI.LivenessAA->isAtFixpoint();
- continue;
- }
- Worklist.push_back(
- {PHI->getIncomingValue(u), IncomingBB->getTerminator()});
- }
- continue;
- }
-
- if (auto *Arg = dyn_cast<Argument>(V)) {
- if ((VS & AA::Interprocedural) && !Arg->hasPassPointeeByValueCopyAttr()) {
- SmallVector<Item> CallSiteValues;
- bool UsedAssumedInformation = false;
- if (A.checkForAllCallSites(
- [&](AbstractCallSite ACS) {
- // Callbacks might not have a corresponding call site operand,
- // stick with the argument in that case.
- Value *CSOp = ACS.getCallArgOperand(*Arg);
- if (!CSOp)
- return false;
- CallSiteValues.push_back({CSOp, ACS.getInstruction()});
- return true;
- },
- *Arg->getParent(), true, &QueryingAA, UsedAssumedInformation)) {
- Worklist.append(CallSiteValues);
- continue;
- }
- }
- }
-
- if (UseValueSimplify && !isa<Constant>(V)) {
- Optional<Value *> SimpleV =
- A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation);
- if (!SimpleV)
- continue;
- Value *NewV = SimpleV.value();
- if (NewV && NewV != V) {
- if ((VS & AA::Interprocedural) || !CtxI ||
- AA::isValidInScope(*NewV, CtxI->getFunction())) {
- Worklist.push_back({NewV, CtxI});
- continue;
- }
- }
- }
-
- if (auto *LI = dyn_cast<LoadInst>(V)) {
- bool UsedAssumedInformation = false;
- // If we ask for the potentially loaded values from the initial pointer we
- // will simply end up here again. The load is as far as we can make it.
- if (LI->getPointerOperand() != InitialV) {
- SmallSetVector<Value *, 4> PotentialCopies;
- SmallSetVector<Instruction *, 4> PotentialValueOrigins;
- if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies,
- PotentialValueOrigins, QueryingAA,
- UsedAssumedInformation,
- /* OnlyExact */ true)) {
- // Values have to be dynamically unique or we loose the fact that a
- // single llvm::Value might represent two runtime values (e.g., stack
- // locations in different recursive calls).
- bool DynamicallyUnique =
- llvm::all_of(PotentialCopies, [&A, &QueryingAA](Value *PC) {
- return AA::isDynamicallyUnique(A, QueryingAA, *PC);
- });
- if (DynamicallyUnique &&
- ((VS & AA::Interprocedural) || !CtxI ||
- llvm::all_of(PotentialCopies, [CtxI](Value *PC) {
- return AA::isValidInScope(*PC, CtxI->getFunction());
- }))) {
- for (auto *PotentialCopy : PotentialCopies)
- Worklist.push_back({PotentialCopy, CtxI});
- continue;
- }
- }
- }
- }
-
- // Once a leaf is reached we inform the user through the callback.
- if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) {
- LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: "
- << *V << "!\n");
- return false;
- }
- } while (!Worklist.empty());
-
- // If we actually used liveness information so we have to record a dependence.
- for (auto &It : LivenessAAs)
- if (It.second.AnyDead)
- A.recordDependence(*It.second.LivenessAA, QueryingAA,
- DepClassTy::OPTIONAL);
-
- // All values have been visited.
- return true;
-}
-
bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
- SmallVectorImpl<Value *> &Objects,
+ SmallSetVector<Value *, 8> &Objects,
const AbstractAttribute &QueryingAA,
const Instruction *CtxI,
bool &UsedAssumedInformation,
- AA::ValueScope VS) {
- auto StripCB = [&](Value *V) { return getUnderlyingObject(V); };
- SmallPtrSet<Value *, 8> SeenObjects;
- auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *,
- SmallVectorImpl<Value *> &Objects,
- bool) -> bool {
- if (SeenObjects.insert(&Val).second)
- Objects.push_back(&Val);
+ AA::ValueScope S,
+ SmallPtrSetImpl<Value *> *SeenObjects) {
+ SmallPtrSet<Value *, 8> LocalSeenObjects;
+ if (!SeenObjects)
+ SeenObjects = &LocalSeenObjects;
+
+ SmallVector<AA::ValueAndContext> Values;
+ if (!A.getAssumedSimplifiedValues(IRPosition::value(Ptr), &QueryingAA, Values,
+ S, UsedAssumedInformation)) {
+ Objects.insert(const_cast<Value *>(&Ptr));
return true;
- };
- if (!genericValueTraversal<decltype(Objects)>(
- A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI,
- UsedAssumedInformation, true, 32, StripCB, VS))
- return false;
+ }
+
+ for (auto &VAC : Values) {
+ Value *UO = getUnderlyingObject(VAC.getValue());
+ if (UO && UO != VAC.getValue() && SeenObjects->insert(UO).second) {
+ if (!getAssumedUnderlyingObjects(A, *UO, Objects, QueryingAA,
+ VAC.getCtxI(), UsedAssumedInformation, S,
+ SeenObjects))
+ return false;
+ continue;
+ }
+ Objects.insert(VAC.getValue());
+ }
return true;
}
@@ -1122,9 +942,6 @@ struct AAPointerInfoImpl
using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
AAPointerInfoImpl(const IRPosition &IRP, Attributor &A) : BaseTy(IRP) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override { AAPointerInfo::initialize(A); }
-
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return std::string("PointerInfo ") +
@@ -1144,9 +961,14 @@ struct AAPointerInfoImpl
const override {
return State::forallInterferingAccesses(OAS, CB);
}
- bool forallInterferingAccesses(
- Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I,
- function_ref<bool(const Access &, bool)> UserCB) const override {
+
+ bool
+ forallInterferingAccesses(Attributor &A, const AbstractAttribute &QueryingAA,
+ Instruction &I,
+ function_ref<bool(const Access &, bool)> UserCB,
+ bool &HasBeenWrittenTo) const override {
+ HasBeenWrittenTo = false;
+
SmallPtrSet<const Access *, 8> DominatingWrites;
SmallVector<std::pair<const Access *, bool>, 8> InterferingAccesses;
@@ -1182,14 +1004,12 @@ struct AAPointerInfoImpl
const bool FindInterferingWrites = I.mayReadFromMemory();
const bool FindInterferingReads = I.mayWriteToMemory();
- const bool UseDominanceReasoning = FindInterferingWrites;
+ const bool UseDominanceReasoning =
+ FindInterferingWrites && NoRecurseAA.isKnownNoRecurse();
const bool CanUseCFGResoning = CanIgnoreThreading(I);
InformationCache &InfoCache = A.getInfoCache();
const DominatorTree *DT =
- NoRecurseAA.isKnownNoRecurse() && UseDominanceReasoning
- ? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(
- Scope)
- : nullptr;
+ InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(Scope);
enum GPUAddressSpace : unsigned {
Generic = 0,
@@ -1246,22 +1066,17 @@ struct AAPointerInfoImpl
(!FindInterferingReads || !Acc.isRead()))
return true;
+ bool Dominates = DT && Exact && Acc.isMustAccess() &&
+ (Acc.getLocalInst()->getFunction() == &Scope) &&
+ DT->dominates(Acc.getRemoteInst(), &I);
+ if (FindInterferingWrites && Dominates)
+ HasBeenWrittenTo = true;
+
// For now we only filter accesses based on CFG reasoning which does not
// work yet if we have threading effects, or the access is complicated.
- if (CanUseCFGResoning) {
- if ((!Acc.isWrite() ||
- !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA,
- IsLiveInCalleeCB)) &&
- (!Acc.isRead() ||
- !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA,
- IsLiveInCalleeCB)))
- return true;
- if (DT && Exact && (Acc.getLocalInst()->getFunction() == &Scope) &&
- IsSameThreadAsLoad(Acc)) {
- if (DT->dominates(Acc.getLocalInst(), &I))
- DominatingWrites.insert(&Acc);
- }
- }
+ if (CanUseCFGResoning && Dominates && UseDominanceReasoning &&
+ IsSameThreadAsLoad(Acc))
+ DominatingWrites.insert(&Acc);
InterferingAccesses.push_back({&Acc, Exact});
return true;
@@ -1269,19 +1084,27 @@ struct AAPointerInfoImpl
if (!State::forallInterferingAccesses(I, AccessCB))
return false;
- // If we cannot use CFG reasoning we only filter the non-write accesses
- // and are done here.
- if (!CanUseCFGResoning) {
- for (auto &It : InterferingAccesses)
- if (!UserCB(*It.first, It.second))
- return false;
- return true;
+ if (HasBeenWrittenTo) {
+ const Function *ScopePtr = &Scope;
+ IsLiveInCalleeCB = [ScopePtr](const Function &Fn) {
+ return ScopePtr != &Fn;
+ };
}
// Helper to determine if we can skip a specific write access. This is in
// the worst case quadratic as we are looking for another write that will
// hide the effect of this one.
auto CanSkipAccess = [&](const Access &Acc, bool Exact) {
+ if ((!Acc.isWrite() ||
+ !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA,
+ IsLiveInCalleeCB)) &&
+ (!Acc.isRead() ||
+ !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA,
+ IsLiveInCalleeCB)))
+ return true;
+
+ if (!DT || !UseDominanceReasoning)
+ return false;
if (!IsSameThreadAsLoad(Acc))
return false;
if (!DominatingWrites.count(&Acc))
@@ -1303,7 +1126,7 @@ struct AAPointerInfoImpl
// succeeded for all or not.
unsigned NumInterferingAccesses = InterferingAccesses.size();
for (auto &It : InterferingAccesses) {
- if (!DT || NumInterferingAccesses > MaxInterferingAccesses ||
+ if (NumInterferingAccesses > MaxInterferingAccesses ||
!CanSkipAccess(*It.first, It.second)) {
if (!UserCB(*It.first, It.second))
return false;
@@ -1339,8 +1162,9 @@ struct AAPointerInfoImpl
if (FromCallee) {
Content = A.translateArgumentToCallSiteContent(
RAcc.getContent(), CB, *this, UsedAssumedInformation);
- AK = AccessKind(
- AK & (IsByval ? AccessKind::AK_READ : AccessKind::AK_READ_WRITE));
+ AK =
+ AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW));
+ AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST));
}
Changed =
Changed | addAccess(A, OAS.getOffset(), OAS.getSize(), CB, Content,
@@ -1353,6 +1177,27 @@ struct AAPointerInfoImpl
/// Statistic tracking for all AAPointerInfo implementations.
/// See AbstractAttribute::trackStatistics().
void trackPointerInfoStatistics(const IRPosition &IRP) const {}
+
+ /// Dump the state into \p O.
+ void dumpState(raw_ostream &O) {
+ for (auto &It : AccessBins) {
+ O << "[" << It.first.getOffset() << "-"
+ << It.first.getOffset() + It.first.getSize()
+ << "] : " << It.getSecond()->size() << "\n";
+ for (auto &Acc : *It.getSecond()) {
+ O << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() << "\n";
+ if (Acc.getLocalInst() != Acc.getRemoteInst())
+ O << " --> " << *Acc.getRemoteInst()
+ << "\n";
+ if (!Acc.isWrittenValueYetUndetermined()) {
+ if (Acc.getWrittenValue())
+ O << " - c: " << *Acc.getWrittenValue() << "\n";
+ else
+ O << " - c: <unknown>\n";
+ }
+ }
+ }
+ }
};
struct AAPointerInfoFloating : public AAPointerInfoImpl {
@@ -1360,9 +1205,6 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
AAPointerInfoFloating(const IRPosition &IRP, Attributor &A)
: AAPointerInfoImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override { AAPointerInfoImpl::initialize(A); }
-
/// Deal with an access and signal if it was handled successfully.
bool handleAccess(Attributor &A, Instruction &I, Value &Ptr,
Optional<Value *> Content, AccessKind Kind, int64_t Offset,
@@ -1460,7 +1302,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
Follow = true;
return true;
}
- if (isa<CastInst>(Usr) || isa<SelectInst>(Usr))
+ if (isa<CastInst>(Usr) || isa<SelectInst>(Usr) || isa<ReturnInst>(Usr))
return HandlePassthroughUser(Usr, OffsetInfoMap[CurPtr], Follow);
// For PHIs we need to take care of the recurrence explicitly as the value
@@ -1469,6 +1311,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
if (isa<PHINode>(Usr)) {
// Note the order here, the Usr access might change the map, CurPtr is
// already in it though.
+ bool IsFirstPHIUser = !OffsetInfoMap.count(Usr);
OffsetInfo &UsrOI = OffsetInfoMap[Usr];
OffsetInfo &PtrOI = OffsetInfoMap[CurPtr];
// Check if the PHI is invariant (so far).
@@ -1484,52 +1327,69 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
}
// Check if the PHI operand is not dependent on the PHI itself.
- // TODO: This is not great as we look at the pointer type. However, it
- // is unclear where the Offset size comes from with typeless pointers.
APInt Offset(
DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()),
0);
- if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets(
- DL, Offset, /* AllowNonInbounds */ true)) {
- if (Offset != PtrOI.Offset) {
- LLVM_DEBUG(dbgs()
- << "[AAPointerInfo] PHI operand pointer offset mismatch "
- << *CurPtr << " in " << *Usr << "\n");
- return false;
- }
- return HandlePassthroughUser(Usr, PtrOI, Follow);
+ Value *CurPtrBase = CurPtr->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true);
+ auto It = OffsetInfoMap.find(CurPtrBase);
+ if (It != OffsetInfoMap.end()) {
+ Offset += It->getSecond().Offset;
+ if (IsFirstPHIUser || Offset == UsrOI.Offset)
+ return HandlePassthroughUser(Usr, PtrOI, Follow);
+ LLVM_DEBUG(dbgs()
+ << "[AAPointerInfo] PHI operand pointer offset mismatch "
+ << *CurPtr << " in " << *Usr << "\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex "
+ << *CurPtr << " in " << *Usr << "\n");
}
// TODO: Approximate in case we know the direction of the recurrence.
- LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex "
- << *CurPtr << " in " << *Usr << "\n");
UsrOI = PtrOI;
UsrOI.Offset = OffsetAndSize::Unknown;
Follow = true;
return true;
}
- if (auto *LoadI = dyn_cast<LoadInst>(Usr))
- return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr,
- AccessKind::AK_READ, OffsetInfoMap[CurPtr].Offset,
- Changed, LoadI->getType());
+ if (auto *LoadI = dyn_cast<LoadInst>(Usr)) {
+ // If the access is to a pointer that may or may not be the associated
+ // value, e.g. due to a PHI, we cannot assume it will be read.
+ AccessKind AK = AccessKind::AK_R;
+ if (getUnderlyingObject(CurPtr) == &AssociatedValue)
+ AK = AccessKind(AK | AccessKind::AK_MUST);
+ else
+ AK = AccessKind(AK | AccessKind::AK_MAY);
+ return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr, AK,
+ OffsetInfoMap[CurPtr].Offset, Changed,
+ LoadI->getType());
+ }
+
if (auto *StoreI = dyn_cast<StoreInst>(Usr)) {
if (StoreI->getValueOperand() == CurPtr) {
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Escaping use in store "
<< *StoreI << "\n");
return false;
}
+ // If the access is to a pointer that may or may not be the associated
+ // value, e.g. due to a PHI, we cannot assume it will be written.
+ AccessKind AK = AccessKind::AK_W;
+ if (getUnderlyingObject(CurPtr) == &AssociatedValue)
+ AK = AccessKind(AK | AccessKind::AK_MUST);
+ else
+ AK = AccessKind(AK | AccessKind::AK_MAY);
bool UsedAssumedInformation = false;
- Optional<Value *> Content = A.getAssumedSimplified(
- *StoreI->getValueOperand(), *this, UsedAssumedInformation);
- return handleAccess(A, *StoreI, *CurPtr, Content, AccessKind::AK_WRITE,
+ Optional<Value *> Content =
+ A.getAssumedSimplified(*StoreI->getValueOperand(), *this,
+ UsedAssumedInformation, AA::Interprocedural);
+ return handleAccess(A, *StoreI, *CurPtr, Content, AK,
OffsetInfoMap[CurPtr].Offset, Changed,
StoreI->getValueOperand()->getType());
}
if (auto *CB = dyn_cast<CallBase>(Usr)) {
if (CB->isLifetimeStartOrEnd())
return true;
- if (TLI && isFreeCall(CB, TLI))
+ if (getFreedOperand(CB, TLI) == U)
return true;
if (CB->isArgOperand(&U)) {
unsigned ArgNo = CB->getArgOperandNo(&U);
@@ -1539,7 +1399,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
Changed = translateAndAddState(A, CSArgPI,
OffsetInfoMap[CurPtr].Offset, *CB) |
Changed;
- return true;
+ return isValidState();
}
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Call user not handled " << *CB
<< "\n");
@@ -1551,36 +1411,30 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
return false;
};
auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) {
- if (OffsetInfoMap.count(NewU))
+ if (OffsetInfoMap.count(NewU)) {
+ LLVM_DEBUG({
+ if (!(OffsetInfoMap[NewU] == OffsetInfoMap[OldU])) {
+ dbgs() << "[AAPointerInfo] Equivalent use callback failed: "
+ << OffsetInfoMap[NewU].Offset << " vs "
+ << OffsetInfoMap[OldU].Offset << "\n";
+ }
+ });
return OffsetInfoMap[NewU] == OffsetInfoMap[OldU];
+ }
OffsetInfoMap[NewU] = OffsetInfoMap[OldU];
return true;
};
if (!A.checkForAllUses(UsePred, *this, AssociatedValue,
/* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL,
- /* IgnoreDroppableUses */ true, EquivalentUseCB))
+ /* IgnoreDroppableUses */ true, EquivalentUseCB)) {
+ LLVM_DEBUG(
+ dbgs() << "[AAPointerInfo] Check for all uses failed, abort!\n");
return indicatePessimisticFixpoint();
+ }
LLVM_DEBUG({
dbgs() << "Accesses by bin after update:\n";
- for (auto &It : AccessBins) {
- dbgs() << "[" << It.first.getOffset() << "-"
- << It.first.getOffset() + It.first.getSize()
- << "] : " << It.getSecond()->size() << "\n";
- for (auto &Acc : *It.getSecond()) {
- dbgs() << " - " << Acc.getKind() << " - " << *Acc.getLocalInst()
- << "\n";
- if (Acc.getLocalInst() != Acc.getRemoteInst())
- dbgs() << " --> "
- << *Acc.getRemoteInst() << "\n";
- if (!Acc.isWrittenValueYetUndetermined()) {
- if (Acc.getWrittenValue())
- dbgs() << " - c: " << *Acc.getWrittenValue() << "\n";
- else
- dbgs() << " - c: <unknown>\n";
- }
- }
- }
+ dumpState(dbgs());
});
return Changed;
@@ -1643,16 +1497,22 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating {
unsigned ArgNo = getIRPosition().getCallSiteArgNo();
ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (ArgNo == 0) {
- handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_WRITE, 0, Changed,
- nullptr, LengthVal);
+ handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_MUST_WRITE, 0,
+ Changed, nullptr, LengthVal);
} else if (ArgNo == 1) {
- handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_READ, 0, Changed,
+ handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_MUST_READ, 0, Changed,
nullptr, LengthVal);
} else {
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled memory intrinsic "
<< *MI << "\n");
return indicatePessimisticFixpoint();
}
+
+ LLVM_DEBUG({
+ dbgs() << "Accesses by bin after update:\n";
+ dumpState(dbgs());
+ });
+
return Changed;
}
@@ -1954,23 +1814,23 @@ bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts(
ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- auto ReturnValueCB = [&](Value &V, const Instruction *CtxI, ReturnInst &Ret,
- bool) -> bool {
- assert(AA::isValidInScope(V, Ret.getFunction()) &&
- "Assumed returned value should be valid in function scope!");
- if (ReturnedValues[&V].insert(&Ret))
- Changed = ChangeStatus::CHANGED;
- return true;
- };
-
+ SmallVector<AA::ValueAndContext> Values;
bool UsedAssumedInformation = false;
auto ReturnInstCB = [&](Instruction &I) {
ReturnInst &Ret = cast<ReturnInst>(I);
- return genericValueTraversal<ReturnInst>(
- A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB,
- &I, UsedAssumedInformation, /* UseValueSimplify */ true,
- /* MaxValues */ 16,
- /* StripCB */ nullptr, AA::Intraprocedural);
+ Values.clear();
+ if (!A.getAssumedSimplifiedValues(IRPosition::value(*Ret.getReturnValue()),
+ *this, Values, AA::Intraprocedural,
+ UsedAssumedInformation))
+ Values.push_back({*Ret.getReturnValue(), Ret});
+
+ for (auto &VAC : Values) {
+ assert(AA::isValidInScope(*VAC.getValue(), Ret.getFunction()) &&
+ "Assumed returned value should be valid in function scope!");
+ if (ReturnedValues[VAC.getValue()].insert(&Ret))
+ Changed = ChangeStatus::CHANGED;
+ }
+ return true;
};
// Discover returned values from all live returned instructions in the
@@ -2472,6 +2332,18 @@ struct AANonNullFloating : public AANonNullImpl {
ChangeStatus updateImpl(Attributor &A) override {
const DataLayout &DL = A.getDataLayout();
+ bool Stripped;
+ bool UsedAssumedInformation = false;
+ SmallVector<AA::ValueAndContext> Values;
+ if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
+ AA::AnyScope, UsedAssumedInformation)) {
+ Values.push_back({getAssociatedValue(), getCtxI()});
+ Stripped = false;
+ } else {
+ Stripped = Values.size() != 1 ||
+ Values.front().getValue() != &getAssociatedValue();
+ }
+
DominatorTree *DT = nullptr;
AssumptionCache *AC = nullptr;
InformationCache &InfoCache = A.getInfoCache();
@@ -2480,8 +2352,8 @@ struct AANonNullFloating : public AANonNullImpl {
AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*Fn);
}
- auto VisitValueCB = [&](Value &V, const Instruction *CtxI,
- AANonNull::StateType &T, bool Stripped) -> bool {
+ AANonNull::StateType T;
+ auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool {
const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V),
DepClassTy::REQUIRED);
if (!Stripped && this == &AA) {
@@ -2495,12 +2367,9 @@ struct AANonNullFloating : public AANonNullImpl {
return T.isValidState();
};
- StateType T;
- bool UsedAssumedInformation = false;
- if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI(),
- UsedAssumedInformation))
- return indicatePessimisticFixpoint();
+ for (const auto &VAC : Values)
+ if (!VisitValueCB(*VAC.getValue(), VAC.getCtxI()))
+ return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
}
@@ -2753,8 +2622,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
if (!NoUndefAA.isKnownNoUndef())
continue;
bool UsedAssumedInformation = false;
- Optional<Value *> SimplifiedVal = A.getAssumedSimplified(
- IRPosition::value(*ArgVal), *this, UsedAssumedInformation);
+ Optional<Value *> SimplifiedVal =
+ A.getAssumedSimplified(IRPosition::value(*ArgVal), *this,
+ UsedAssumedInformation, AA::Interprocedural);
if (UsedAssumedInformation)
continue;
if (SimplifiedVal && !SimplifiedVal.value())
@@ -2925,8 +2795,9 @@ private:
Optional<Value *> stopOnUndefOrAssumed(Attributor &A, Value *V,
Instruction *I) {
bool UsedAssumedInformation = false;
- Optional<Value *> SimplifiedV = A.getAssumedSimplified(
- IRPosition::value(*V), *this, UsedAssumedInformation);
+ Optional<Value *> SimplifiedV =
+ A.getAssumedSimplified(IRPosition::value(*V), *this,
+ UsedAssumedInformation, AA::Interprocedural);
if (!UsedAssumedInformation) {
// Don't depend on assumed values.
if (!SimplifiedV) {
@@ -3369,7 +3240,9 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
}
}
- if (!AA::isPotentiallyReachable(A, *UserI, *getCtxI(), *this))
+ if (!AA::isPotentiallyReachable(
+ A, *UserI, *getCtxI(), *this,
+ [ScopeFn](const Function &Fn) { return &Fn != ScopeFn; }))
return true;
}
@@ -4364,10 +4237,23 @@ struct AADereferenceableFloating : AADereferenceableImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
+
+ bool Stripped;
+ bool UsedAssumedInformation = false;
+ SmallVector<AA::ValueAndContext> Values;
+ if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
+ AA::AnyScope, UsedAssumedInformation)) {
+ Values.push_back({getAssociatedValue(), getCtxI()});
+ Stripped = false;
+ } else {
+ Stripped = Values.size() != 1 ||
+ Values.front().getValue() != &getAssociatedValue();
+ }
+
const DataLayout &DL = A.getDataLayout();
+ DerefState T;
- auto VisitValueCB = [&](const Value &V, const Instruction *, DerefState &T,
- bool Stripped) -> bool {
+ auto VisitValueCB = [&](const Value &V) -> bool {
unsigned IdxWidth =
DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace());
APInt Offset(IdxWidth, 0);
@@ -4420,12 +4306,9 @@ struct AADereferenceableFloating : AADereferenceableImpl {
return T.isValidState();
};
- DerefState T;
- bool UsedAssumedInformation = false;
- if (!genericValueTraversal<DerefState>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI(),
- UsedAssumedInformation))
- return indicatePessimisticFixpoint();
+ for (const auto &VAC : Values)
+ if (!VisitValueCB(*VAC.getValue()))
+ return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
}
@@ -4652,8 +4535,20 @@ struct AAAlignFloating : AAAlignImpl {
ChangeStatus updateImpl(Attributor &A) override {
const DataLayout &DL = A.getDataLayout();
- auto VisitValueCB = [&](Value &V, const Instruction *,
- AAAlign::StateType &T, bool Stripped) -> bool {
+ bool Stripped;
+ bool UsedAssumedInformation = false;
+ SmallVector<AA::ValueAndContext> Values;
+ if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
+ AA::AnyScope, UsedAssumedInformation)) {
+ Values.push_back({getAssociatedValue(), getCtxI()});
+ Stripped = false;
+ } else {
+ Stripped = Values.size() != 1 ||
+ Values.front().getValue() != &getAssociatedValue();
+ }
+
+ StateType T;
+ auto VisitValueCB = [&](Value &V) -> bool {
if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V))
return true;
const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V),
@@ -4686,15 +4581,13 @@ struct AAAlignFloating : AAAlignImpl {
return T.isValidState();
};
- StateType T;
- bool UsedAssumedInformation = false;
- if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI(),
- UsedAssumedInformation))
- return indicatePessimisticFixpoint();
+ for (const auto &VAC : Values) {
+ if (!VisitValueCB(*VAC.getValue()))
+ return indicatePessimisticFixpoint();
+ }
- // TODO: If we know we visited all incoming values, thus no are assumed
- // dead, we can take the known information from the state T.
+ // TODO: If we know we visited all incoming values, thus no are assumed
+ // dead, we can take the known information from the state T.
return clampStateAndIndicateChange(getState(), T);
}
@@ -4941,7 +4834,9 @@ struct AAInstanceInfoImpl : public AAInstanceInfo {
return false;
// If this call base might reach the scope again we might forward the
// argument back here. This is very conservative.
- if (AA::isPotentiallyReachable(A, *CB, *Scope, *this, nullptr))
+ if (AA::isPotentiallyReachable(
+ A, *CB, *Scope, *this,
+ [Scope](const Function &Fn) { return &Fn != Scope; }))
return false;
return true;
}
@@ -5518,9 +5413,9 @@ struct AAValueSimplifyImpl : AAValueSimplify {
if (const auto &NewV = VMap.lookup(&V))
return NewV;
bool UsedAssumedInformation = false;
- Optional<Value *> SimpleV =
- A.getAssumedSimplified(V, QueryingAA, UsedAssumedInformation);
- if (!SimpleV)
+ Optional<Value *> SimpleV = A.getAssumedSimplified(
+ V, QueryingAA, UsedAssumedInformation, AA::Interprocedural);
+ if (!SimpleV.has_value())
return PoisonValue::get(&Ty);
Value *EffectiveV = &V;
if (SimpleV.value())
@@ -5561,8 +5456,8 @@ struct AAValueSimplifyImpl : AAValueSimplify {
bool UsedAssumedInformation = false;
Optional<Value *> QueryingValueSimplified = &IRP.getAssociatedValue();
if (Simplify)
- QueryingValueSimplified =
- A.getAssumedSimplified(IRP, QueryingAA, UsedAssumedInformation);
+ QueryingValueSimplified = A.getAssumedSimplified(
+ IRP, QueryingAA, UsedAssumedInformation, AA::Interprocedural);
return unionAssumed(QueryingValueSimplified);
}
@@ -5763,209 +5658,11 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
indicatePessimisticFixpoint();
}
- /// Check if \p Cmp is a comparison we can simplify.
- ///
- /// We handle multiple cases, one in which at least one operand is an
- /// (assumed) nullptr. If so, try to simplify it using AANonNull on the other
- /// operand. Return true if successful, in that case SimplifiedAssociatedValue
- /// will be updated.
- bool handleCmp(Attributor &A, CmpInst &Cmp) {
- auto Union = [&](Value &V) {
- SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
- SimplifiedAssociatedValue, &V, V.getType());
- return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
- };
-
- Value *LHS = Cmp.getOperand(0);
- Value *RHS = Cmp.getOperand(1);
-
- // Simplify the operands first.
- bool UsedAssumedInformation = false;
- const auto &SimplifiedLHS =
- A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedLHS)
- return true;
- if (!SimplifiedLHS.value())
- return false;
- LHS = *SimplifiedLHS;
-
- const auto &SimplifiedRHS =
- A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedRHS)
- return true;
- if (!SimplifiedRHS.value())
- return false;
- RHS = *SimplifiedRHS;
-
- LLVMContext &Ctx = Cmp.getContext();
- // Handle the trivial case first in which we don't even need to think about
- // null or non-null.
- if (LHS == RHS && (Cmp.isTrueWhenEqual() || Cmp.isFalseWhenEqual())) {
- Constant *NewVal =
- ConstantInt::get(Type::getInt1Ty(Ctx), Cmp.isTrueWhenEqual());
- if (!Union(*NewVal))
- return false;
- if (!UsedAssumedInformation)
- indicateOptimisticFixpoint();
- return true;
- }
-
- // From now on we only handle equalities (==, !=).
- ICmpInst *ICmp = dyn_cast<ICmpInst>(&Cmp);
- if (!ICmp || !ICmp->isEquality())
- return false;
-
- bool LHSIsNull = isa<ConstantPointerNull>(LHS);
- bool RHSIsNull = isa<ConstantPointerNull>(RHS);
- if (!LHSIsNull && !RHSIsNull)
- return false;
-
- // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the
- // non-nullptr operand and if we assume it's non-null we can conclude the
- // result of the comparison.
- assert((LHSIsNull || RHSIsNull) &&
- "Expected nullptr versus non-nullptr comparison at this point");
-
- // The index is the operand that we assume is not null.
- unsigned PtrIdx = LHSIsNull;
- auto &PtrNonNullAA = A.getAAFor<AANonNull>(
- *this, IRPosition::value(*ICmp->getOperand(PtrIdx)),
- DepClassTy::REQUIRED);
- if (!PtrNonNullAA.isAssumedNonNull())
- return false;
- UsedAssumedInformation |= !PtrNonNullAA.isKnownNonNull();
-
- // The new value depends on the predicate, true for != and false for ==.
- Constant *NewVal = ConstantInt::get(
- Type::getInt1Ty(Ctx), ICmp->getPredicate() == CmpInst::ICMP_NE);
- if (!Union(*NewVal))
- return false;
-
- if (!UsedAssumedInformation)
- indicateOptimisticFixpoint();
-
- return true;
- }
-
- /// Use the generic, non-optimistic InstSimplfy functionality if we managed to
- /// simplify any operand of the instruction \p I. Return true if successful,
- /// in that case SimplifiedAssociatedValue will be updated.
- bool handleGenericInst(Attributor &A, Instruction &I) {
- bool SomeSimplified = false;
- bool UsedAssumedInformation = false;
-
- SmallVector<Value *, 8> NewOps(I.getNumOperands());
- int Idx = 0;
- for (Value *Op : I.operands()) {
- const auto &SimplifiedOp =
- A.getAssumedSimplified(IRPosition::value(*Op, getCallBaseContext()),
- *this, UsedAssumedInformation);
- // If we are not sure about any operand we are not sure about the entire
- // instruction, we'll wait.
- if (!SimplifiedOp)
- return true;
-
- if (SimplifiedOp.value())
- NewOps[Idx] = SimplifiedOp.value();
- else
- NewOps[Idx] = Op;
-
- SomeSimplified |= (NewOps[Idx] != Op);
- ++Idx;
- }
-
- // We won't bother with the InstSimplify interface if we didn't simplify any
- // operand ourselves.
- if (!SomeSimplified)
- return false;
-
- InformationCache &InfoCache = A.getInfoCache();
- Function *F = I.getFunction();
- const auto *DT =
- InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F);
- const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
- auto *AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F);
- OptimizationRemarkEmitter *ORE = nullptr;
-
- const DataLayout &DL = I.getModule()->getDataLayout();
- SimplifyQuery Q(DL, TLI, DT, AC, &I);
- if (Value *SimplifiedI =
- simplifyInstructionWithOperands(&I, NewOps, Q, ORE)) {
- SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
- SimplifiedAssociatedValue, SimplifiedI, I.getType());
- return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
- }
- return false;
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
auto Before = SimplifiedAssociatedValue;
-
- // Do not simplify loads that are only used in llvm.assume if we cannot also
- // remove all stores that may feed into the load. The reason is that the
- // assume is probably worth something as long as the stores are around.
- if (auto *LI = dyn_cast<LoadInst>(&getAssociatedValue())) {
- InformationCache &InfoCache = A.getInfoCache();
- if (InfoCache.isOnlyUsedByAssume(*LI)) {
- SmallSetVector<Value *, 4> PotentialCopies;
- SmallSetVector<Instruction *, 4> PotentialValueOrigins;
- bool UsedAssumedInformation = false;
- if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies,
- PotentialValueOrigins, *this,
- UsedAssumedInformation,
- /* OnlyExact */ true)) {
- if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) {
- if (!I)
- return true;
- if (auto *SI = dyn_cast<StoreInst>(I))
- return A.isAssumedDead(SI->getOperandUse(0), this,
- /* LivenessAA */ nullptr,
- UsedAssumedInformation,
- /* CheckBBLivenessOnly */ false);
- return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr,
- UsedAssumedInformation,
- /* CheckBBLivenessOnly */ false);
- }))
- return indicatePessimisticFixpoint();
- }
- }
- }
-
- auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &,
- bool Stripped) -> bool {
- auto &AA = A.getAAFor<AAValueSimplify>(
- *this, IRPosition::value(V, getCallBaseContext()),
- DepClassTy::REQUIRED);
- if (!Stripped && this == &AA) {
-
- if (auto *I = dyn_cast<Instruction>(&V)) {
- if (auto *Cmp = dyn_cast<CmpInst>(&V))
- if (handleCmp(A, *Cmp))
- return true;
- if (handleGenericInst(A, *I))
- return true;
- }
- // TODO: Look the instruction and check recursively.
-
- LLVM_DEBUG(dbgs() << "[ValueSimplify] Can't be stripped more : " << V
- << "\n");
- return false;
- }
- return checkAndUpdate(A, *this,
- IRPosition::value(V, getCallBaseContext()));
- };
-
- bool Dummy = false;
- bool UsedAssumedInformation = false;
- if (!genericValueTraversal<bool>(A, getIRPosition(), *this, Dummy,
- VisitValueCB, getCtxI(),
- UsedAssumedInformation,
- /* UseValueSimplify */ false))
- if (!askSimplifiedValueForOtherAAs(A))
- return indicatePessimisticFixpoint();
+ if (!askSimplifiedValueForOtherAAs(A))
+ return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
return Before == SimplifiedAssociatedValue ? ChangeStatus::UNCHANGED
@@ -6122,6 +5819,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
struct DeallocationInfo {
/// The call that deallocates the memory.
CallBase *const CB;
+ /// The value freed by the call.
+ Value *FreedOp;
/// Flag to indicate if we don't know all objects this deallocation might
/// free.
@@ -6153,14 +5852,14 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
CallBase *CB = dyn_cast<CallBase>(&I);
if (!CB)
return true;
- if (isFreeCall(CB, TLI)) {
- DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB};
+ if (Value *FreedOp = getFreedOperand(CB, TLI)) {
+ DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB, FreedOp};
return true;
}
// To do heap to stack, we need to know that the allocation itself is
// removable once uses are rewritten, and that we can initialize the
// alloca to the same pattern as the original allocation result.
- if (isAllocationFn(CB, TLI) && isAllocRemovable(CB, TLI)) {
+ if (isRemovableAlloc(CB, TLI)) {
auto *I8Ty = Type::getInt8Ty(CB->getParent()->getContext());
if (nullptr != getInitialValueOfAllocation(CB, TLI, I8Ty)) {
AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB};
@@ -6427,44 +6126,36 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
/* CheckBBLivenessOnly */ true))
continue;
- // Use the optimistic version to get the freed objects, ignoring dead
- // branches etc.
- SmallVector<Value *, 8> Objects;
- if (!AA::getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects,
- *this, DI.CB,
- UsedAssumedInformation)) {
- LLVM_DEBUG(
- dbgs()
- << "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n");
+ // Use the non-optimistic version to get the freed object.
+ Value *Obj = getUnderlyingObject(DI.FreedOp);
+ if (!Obj) {
+ LLVM_DEBUG(dbgs() << "[H2S] Unknown underlying object for free!\n");
DI.MightFreeUnknownObjects = true;
continue;
}
- // Check each object explicitly.
- for (auto *Obj : Objects) {
- // Free of null and undef can be ignored as no-ops (or UB in the latter
- // case).
- if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj))
- continue;
-
- CallBase *ObjCB = dyn_cast<CallBase>(Obj);
- if (!ObjCB) {
- LLVM_DEBUG(dbgs()
- << "[H2S] Free of a non-call object: " << *Obj << "\n");
- DI.MightFreeUnknownObjects = true;
- continue;
- }
+ // Free of null and undef can be ignored as no-ops (or UB in the latter
+ // case).
+ if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj))
+ continue;
- AllocationInfo *AI = AllocationInfos.lookup(ObjCB);
- if (!AI) {
- LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj
- << "\n");
- DI.MightFreeUnknownObjects = true;
- continue;
- }
+ CallBase *ObjCB = dyn_cast<CallBase>(Obj);
+ if (!ObjCB) {
+ LLVM_DEBUG(dbgs() << "[H2S] Free of a non-call object: " << *Obj
+ << "\n");
+ DI.MightFreeUnknownObjects = true;
+ continue;
+ }
- DI.PotentialAllocationCalls.insert(ObjCB);
+ AllocationInfo *AI = AllocationInfos.lookup(ObjCB);
+ if (!AI) {
+ LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj
+ << "\n");
+ DI.MightFreeUnknownObjects = true;
+ continue;
}
+
+ DI.PotentialAllocationCalls.insert(ObjCB);
}
};
@@ -7692,7 +7383,7 @@ bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use &U,
const Instruction *UserI) {
// The loaded value is unrelated to the pointer argument, no need to
// follow the users of the load.
- if (isa<LoadInst>(UserI))
+ if (isa<LoadInst>(UserI) || isa<ReturnInst>(UserI))
return false;
// By default we follow all uses assuming UserI might leak information on U,
@@ -7822,16 +7513,15 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A)
: AAMemoryLocation(IRP, A), Allocator(A.Allocator) {
- for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u)
- AccessKind2Accesses[u] = nullptr;
+ AccessKind2Accesses.fill(nullptr);
}
~AAMemoryLocationImpl() {
// The AccessSets are allocated via a BumpPtrAllocator, we call
// the destructor manually.
- for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u)
- if (AccessKind2Accesses[u])
- AccessKind2Accesses[u]->~AccessSet();
+ for (AccessSet *AS : AccessKind2Accesses)
+ if (AS)
+ AS->~AccessSet();
}
/// See AbstractAttribute::initialize(...).
@@ -7999,7 +7689,7 @@ protected:
/// Mapping from *single* memory location kinds, e.g., LOCAL_MEM with the
/// value of NO_LOCAL_MEM, to the accesses encountered for this memory kind.
using AccessSet = SmallSet<AccessInfo, 2, AccessInfo>;
- AccessSet *AccessKind2Accesses[llvm::CTLog2<VALID_STATE>()];
+ std::array<AccessSet *, llvm::CTLog2<VALID_STATE>()> AccessKind2Accesses;
/// Categorize the pointer arguments of CB that might access memory in
/// AccessedLoc and update the state and access map accordingly.
@@ -8061,7 +7751,7 @@ void AAMemoryLocationImpl::categorizePtrValue(
<< Ptr << " ["
<< getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
- SmallVector<Value *, 8> Objects;
+ SmallSetVector<Value *, 8> Objects;
bool UsedAssumedInformation = false;
if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I,
UsedAssumedInformation,
@@ -8670,19 +8360,19 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
// Simplify the operands first.
bool UsedAssumedInformation = false;
- const auto &SimplifiedLHS =
- A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedLHS)
+ const auto &SimplifiedLHS = A.getAssumedSimplified(
+ IRPosition::value(*LHS, getCallBaseContext()), *this,
+ UsedAssumedInformation, AA::Interprocedural);
+ if (!SimplifiedLHS.has_value())
return true;
if (!SimplifiedLHS.value())
return false;
LHS = *SimplifiedLHS;
- const auto &SimplifiedRHS =
- A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedRHS)
+ const auto &SimplifiedRHS = A.getAssumedSimplified(
+ IRPosition::value(*RHS, getCallBaseContext()), *this,
+ UsedAssumedInformation, AA::Interprocedural);
+ if (!SimplifiedRHS.has_value())
return true;
if (!SimplifiedRHS.value())
return false;
@@ -8723,10 +8413,10 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
// Simplify the operand first.
bool UsedAssumedInformation = false;
- const auto &SimplifiedOpV =
- A.getAssumedSimplified(IRPosition::value(*OpV, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedOpV)
+ const auto &SimplifiedOpV = A.getAssumedSimplified(
+ IRPosition::value(*OpV, getCallBaseContext()), *this,
+ UsedAssumedInformation, AA::Interprocedural);
+ if (!SimplifiedOpV.has_value())
return true;
if (!SimplifiedOpV.value())
return false;
@@ -8753,19 +8443,19 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
// Simplify the operands first.
bool UsedAssumedInformation = false;
- const auto &SimplifiedLHS =
- A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedLHS)
+ const auto &SimplifiedLHS = A.getAssumedSimplified(
+ IRPosition::value(*LHS, getCallBaseContext()), *this,
+ UsedAssumedInformation, AA::Interprocedural);
+ if (!SimplifiedLHS.has_value())
return true;
if (!SimplifiedLHS.value())
return false;
LHS = *SimplifiedLHS;
- const auto &SimplifiedRHS =
- A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedRHS)
+ const auto &SimplifiedRHS = A.getAssumedSimplified(
+ IRPosition::value(*RHS, getCallBaseContext()), *this,
+ UsedAssumedInformation, AA::Interprocedural);
+ if (!SimplifiedRHS.has_value())
return true;
if (!SimplifiedRHS.value())
return false;
@@ -8820,17 +8510,18 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- auto VisitValueCB = [&](Value &V, const Instruction *CtxI,
- IntegerRangeState &T, bool Stripped) -> bool {
+
+ IntegerRangeState T(getBitWidth());
+ auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool {
Instruction *I = dyn_cast<Instruction>(&V);
if (!I || isa<CallBase>(I)) {
// Simplify the operand first.
bool UsedAssumedInformation = false;
- const auto &SimplifiedOpV =
- A.getAssumedSimplified(IRPosition::value(V, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedOpV)
+ const auto &SimplifiedOpV = A.getAssumedSimplified(
+ IRPosition::value(V, getCallBaseContext()), *this,
+ UsedAssumedInformation, AA::Interprocedural);
+ if (!SimplifiedOpV.has_value())
return true;
if (!SimplifiedOpV.value())
return false;
@@ -8880,13 +8571,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
return T.isValidState();
};
- IntegerRangeState T(getBitWidth());
-
- bool UsedAssumedInformation = false;
- if (!genericValueTraversal<IntegerRangeState>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI(),
- UsedAssumedInformation,
- /* UseValueSimplify */ false))
+ if (!VisitValueCB(getAssociatedValue(), getCtxI()))
return indicatePessimisticFixpoint();
// Ensure that long def-use chains can't cause circular reasoning either by
@@ -8998,6 +8683,36 @@ struct AAPotentialConstantValuesImpl : AAPotentialConstantValues {
AAPotentialConstantValues::initialize(A);
}
+ bool fillSetWithConstantValues(Attributor &A, const IRPosition &IRP, SetTy &S,
+ bool &ContainsUndef) {
+ SmallVector<AA::ValueAndContext> Values;
+ bool UsedAssumedInformation = false;
+ if (!A.getAssumedSimplifiedValues(IRP, *this, Values, AA::Interprocedural,
+ UsedAssumedInformation)) {
+ if (!IRP.getAssociatedType()->isIntegerTy())
+ return false;
+ auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, IRP, DepClassTy::REQUIRED);
+ if (!PotentialValuesAA.getState().isValidState())
+ return false;
+ ContainsUndef = PotentialValuesAA.getState().undefIsContained();
+ S = PotentialValuesAA.getState().getAssumedSet();
+ return true;
+ }
+
+ for (auto &It : Values) {
+ if (isa<UndefValue>(It.getValue()))
+ continue;
+ auto *CI = dyn_cast<ConstantInt>(It.getValue());
+ if (!CI)
+ return false;
+ S.insert(CI->getValue());
+ }
+ ContainsUndef = S.empty();
+
+ return true;
+ }
+
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
std::string Str;
@@ -9186,50 +8901,22 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
- // Simplify the operands first.
- bool UsedAssumedInformation = false;
- const auto &SimplifiedLHS =
- A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedLHS)
- return ChangeStatus::UNCHANGED;
- if (!SimplifiedLHS.value())
- return indicatePessimisticFixpoint();
- LHS = *SimplifiedLHS;
-
- const auto &SimplifiedRHS =
- A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedRHS)
- return ChangeStatus::UNCHANGED;
- if (!SimplifiedRHS.value())
- return indicatePessimisticFixpoint();
- RHS = *SimplifiedRHS;
-
- if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
- return indicatePessimisticFixpoint();
-
- auto &LHSAA = A.getAAFor<AAPotentialConstantValues>(
- *this, IRPosition::value(*LHS), DepClassTy::REQUIRED);
- if (!LHSAA.isValidState())
+ bool LHSContainsUndef = false, RHSContainsUndef = false;
+ SetTy LHSAAPVS, RHSAAPVS;
+ if (!fillSetWithConstantValues(A, IRPosition::value(*LHS), LHSAAPVS,
+ LHSContainsUndef) ||
+ !fillSetWithConstantValues(A, IRPosition::value(*RHS), RHSAAPVS,
+ RHSContainsUndef))
return indicatePessimisticFixpoint();
- auto &RHSAA = A.getAAFor<AAPotentialConstantValues>(
- *this, IRPosition::value(*RHS), DepClassTy::REQUIRED);
- if (!RHSAA.isValidState())
- return indicatePessimisticFixpoint();
-
- const SetTy &LHSAAPVS = LHSAA.getAssumedSet();
- const SetTy &RHSAAPVS = RHSAA.getAssumedSet();
-
// TODO: make use of undef flag to limit potential values aggressively.
bool MaybeTrue = false, MaybeFalse = false;
const APInt Zero(RHS->getType()->getIntegerBitWidth(), 0);
- if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) {
+ if (LHSContainsUndef && RHSContainsUndef) {
// The result of any comparison between undefs can be soundly replaced
// with undef.
unionAssumedWithUndef();
- } else if (LHSAA.undefIsContained()) {
+ } else if (LHSContainsUndef) {
for (const APInt &R : RHSAAPVS) {
bool CmpResult = calculateICmpInst(ICI, Zero, R);
MaybeTrue |= CmpResult;
@@ -9237,7 +8924,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
if (MaybeTrue & MaybeFalse)
return indicatePessimisticFixpoint();
}
- } else if (RHSAA.undefIsContained()) {
+ } else if (RHSContainsUndef) {
for (const APInt &L : LHSAAPVS) {
bool CmpResult = calculateICmpInst(ICI, L, Zero);
MaybeTrue |= CmpResult;
@@ -9269,29 +8956,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
Value *LHS = SI->getTrueValue();
Value *RHS = SI->getFalseValue();
- // Simplify the operands first.
bool UsedAssumedInformation = false;
- const auto &SimplifiedLHS =
- A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedLHS)
- return ChangeStatus::UNCHANGED;
- if (!SimplifiedLHS.value())
- return indicatePessimisticFixpoint();
- LHS = *SimplifiedLHS;
-
- const auto &SimplifiedRHS =
- A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedRHS)
- return ChangeStatus::UNCHANGED;
- if (!SimplifiedRHS.value())
- return indicatePessimisticFixpoint();
- RHS = *SimplifiedRHS;
-
- if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
- return indicatePessimisticFixpoint();
-
Optional<Constant *> C = A.getAssumedConstant(*SI->getCondition(), *this,
UsedAssumedInformation);
@@ -9302,35 +8967,36 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
else if (C && *C && (*C)->isZeroValue())
OnlyRight = true;
- const AAPotentialConstantValues *LHSAA = nullptr, *RHSAA = nullptr;
- if (!OnlyRight) {
- LHSAA = &A.getAAFor<AAPotentialConstantValues>(
- *this, IRPosition::value(*LHS), DepClassTy::REQUIRED);
- if (!LHSAA->isValidState())
- return indicatePessimisticFixpoint();
- }
- if (!OnlyLeft) {
- RHSAA = &A.getAAFor<AAPotentialConstantValues>(
- *this, IRPosition::value(*RHS), DepClassTy::REQUIRED);
- if (!RHSAA->isValidState())
- return indicatePessimisticFixpoint();
- }
+ bool LHSContainsUndef = false, RHSContainsUndef = false;
+ SetTy LHSAAPVS, RHSAAPVS;
+ if (!OnlyRight && !fillSetWithConstantValues(A, IRPosition::value(*LHS),
+ LHSAAPVS, LHSContainsUndef))
+ return indicatePessimisticFixpoint();
+
+ if (!OnlyLeft && !fillSetWithConstantValues(A, IRPosition::value(*RHS),
+ RHSAAPVS, RHSContainsUndef))
+ return indicatePessimisticFixpoint();
- if (!LHSAA || !RHSAA) {
+ if (OnlyLeft || OnlyRight) {
// select (true/false), lhs, rhs
- auto *OpAA = LHSAA ? LHSAA : RHSAA;
+ auto *OpAA = OnlyLeft ? &LHSAAPVS : &RHSAAPVS;
+ auto Undef = OnlyLeft ? LHSContainsUndef : RHSContainsUndef;
- if (OpAA->undefIsContained())
+ if (Undef)
unionAssumedWithUndef();
- else
- unionAssumed(*OpAA);
+ else {
+ for (auto &It : *OpAA)
+ unionAssumed(It);
+ }
- } else if (LHSAA->undefIsContained() && RHSAA->undefIsContained()) {
+ } else if (LHSContainsUndef && RHSContainsUndef) {
// select i1 *, undef , undef => undef
unionAssumedWithUndef();
} else {
- unionAssumed(*LHSAA);
- unionAssumed(*RHSAA);
+ for (auto &It : LHSAAPVS)
+ unionAssumed(It);
+ for (auto &It : RHSAAPVS)
+ unionAssumed(It);
}
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
@@ -9344,26 +9010,16 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
uint32_t ResultBitWidth = CI->getDestTy()->getIntegerBitWidth();
Value *Src = CI->getOperand(0);
- // Simplify the operand first.
- bool UsedAssumedInformation = false;
- const auto &SimplifiedSrc =
- A.getAssumedSimplified(IRPosition::value(*Src, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedSrc)
- return ChangeStatus::UNCHANGED;
- if (!SimplifiedSrc.value())
+ bool SrcContainsUndef = false;
+ SetTy SrcPVS;
+ if (!fillSetWithConstantValues(A, IRPosition::value(*Src), SrcPVS,
+ SrcContainsUndef))
return indicatePessimisticFixpoint();
- Src = *SimplifiedSrc;
- auto &SrcAA = A.getAAFor<AAPotentialConstantValues>(
- *this, IRPosition::value(*Src), DepClassTy::REQUIRED);
- if (!SrcAA.isValidState())
- return indicatePessimisticFixpoint();
- const SetTy &SrcAAPVS = SrcAA.getAssumedSet();
- if (SrcAA.undefIsContained())
+ if (SrcContainsUndef)
unionAssumedWithUndef();
else {
- for (const APInt &S : SrcAAPVS) {
+ for (const APInt &S : SrcPVS) {
APInt T = calculateCastInst(CI, S, ResultBitWidth);
unionAssumed(T);
}
@@ -9377,53 +9033,26 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
Value *LHS = BinOp->getOperand(0);
Value *RHS = BinOp->getOperand(1);
- // Simplify the operands first.
- bool UsedAssumedInformation = false;
- const auto &SimplifiedLHS =
- A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedLHS)
- return ChangeStatus::UNCHANGED;
- if (!SimplifiedLHS.value())
+ bool LHSContainsUndef = false, RHSContainsUndef = false;
+ SetTy LHSAAPVS, RHSAAPVS;
+ if (!fillSetWithConstantValues(A, IRPosition::value(*LHS), LHSAAPVS,
+ LHSContainsUndef) ||
+ !fillSetWithConstantValues(A, IRPosition::value(*RHS), RHSAAPVS,
+ RHSContainsUndef))
return indicatePessimisticFixpoint();
- LHS = *SimplifiedLHS;
- const auto &SimplifiedRHS =
- A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
- *this, UsedAssumedInformation);
- if (!SimplifiedRHS)
- return ChangeStatus::UNCHANGED;
- if (!SimplifiedRHS.value())
- return indicatePessimisticFixpoint();
- RHS = *SimplifiedRHS;
-
- if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
- return indicatePessimisticFixpoint();
-
- auto &LHSAA = A.getAAFor<AAPotentialConstantValues>(
- *this, IRPosition::value(*LHS), DepClassTy::REQUIRED);
- if (!LHSAA.isValidState())
- return indicatePessimisticFixpoint();
-
- auto &RHSAA = A.getAAFor<AAPotentialConstantValues>(
- *this, IRPosition::value(*RHS), DepClassTy::REQUIRED);
- if (!RHSAA.isValidState())
- return indicatePessimisticFixpoint();
-
- const SetTy &LHSAAPVS = LHSAA.getAssumedSet();
- const SetTy &RHSAAPVS = RHSAA.getAssumedSet();
const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0);
// TODO: make use of undef flag to limit potential values aggressively.
- if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) {
+ if (LHSContainsUndef && RHSContainsUndef) {
if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, Zero))
return indicatePessimisticFixpoint();
- } else if (LHSAA.undefIsContained()) {
+ } else if (LHSContainsUndef) {
for (const APInt &R : RHSAAPVS) {
if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, R))
return indicatePessimisticFixpoint();
}
- } else if (RHSAA.undefIsContained()) {
+ } else if (RHSContainsUndef) {
for (const APInt &L : LHSAAPVS) {
if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, Zero))
return indicatePessimisticFixpoint();
@@ -9440,35 +9069,6 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
: ChangeStatus::CHANGED;
}
- ChangeStatus updateWithPHINode(Attributor &A, PHINode *PHI) {
- auto AssumedBefore = getAssumed();
- for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
- Value *IncomingValue = PHI->getIncomingValue(u);
-
- // Simplify the operand first.
- bool UsedAssumedInformation = false;
- const auto &SimplifiedIncomingValue = A.getAssumedSimplified(
- IRPosition::value(*IncomingValue, getCallBaseContext()), *this,
- UsedAssumedInformation);
- if (!SimplifiedIncomingValue)
- continue;
- if (!SimplifiedIncomingValue.value())
- return indicatePessimisticFixpoint();
- IncomingValue = *SimplifiedIncomingValue;
-
- auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>(
- *this, IRPosition::value(*IncomingValue), DepClassTy::REQUIRED);
- if (!PotentialValuesAA.isValidState())
- return indicatePessimisticFixpoint();
- if (PotentialValuesAA.undefIsContained())
- unionAssumedWithUndef();
- else
- unionAssumed(PotentialValuesAA.getAssumed());
- }
- return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
Value &V = getAssociatedValue();
@@ -9486,9 +9086,6 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
if (auto *BinOp = dyn_cast<BinaryOperator>(I))
return updateWithBinaryOperator(A, BinOp);
- if (auto *PHI = dyn_cast<PHINode>(I))
- return updateWithPHINode(A, PHI);
-
return indicatePessimisticFixpoint();
}
@@ -9642,7 +9239,8 @@ struct AANoUndefImpl : AANoUndef {
// A position whose simplified value does not have any value is
// considered to be dead. We don't manifest noundef in such positions for
// the same reason above.
- if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation)
+ if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation,
+ AA::Interprocedural)
.has_value())
return ChangeStatus::UNCHANGED;
return AANoUndef::manifest(A);
@@ -9663,11 +9261,19 @@ struct AANoUndefFloating : public AANoUndefImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- auto VisitValueCB = [&](Value &V, const Instruction *CtxI,
- AANoUndef::StateType &T, bool Stripped) -> bool {
+
+ SmallVector<AA::ValueAndContext> Values;
+ bool UsedAssumedInformation = false;
+ if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
+ AA::AnyScope, UsedAssumedInformation)) {
+ Values.push_back({getAssociatedValue(), getCtxI()});
+ }
+
+ StateType T;
+ auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool {
const auto &AA = A.getAAFor<AANoUndef>(*this, IRPosition::value(V),
DepClassTy::REQUIRED);
- if (!Stripped && this == &AA) {
+ if (this == &AA) {
T.indicatePessimisticFixpoint();
} else {
const AANoUndef::StateType &S =
@@ -9677,12 +9283,9 @@ struct AANoUndefFloating : public AANoUndefImpl {
return T.isValidState();
};
- StateType T;
- bool UsedAssumedInformation = false;
- if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI(),
- UsedAssumedInformation))
- return indicatePessimisticFixpoint();
+ for (const auto &VAC : Values)
+ if (!VisitValueCB(*VAC.getValue(), VAC.getCtxI()))
+ return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
}
@@ -9782,8 +9385,7 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Change = ChangeStatus::UNCHANGED;
- auto VisitValue = [&](Value &V, const Instruction *CtxI, bool &HasUnknown,
- bool Stripped) -> bool {
+ auto VisitValue = [&](Value &V, const Instruction *CtxI) -> bool {
if (Function *Fn = dyn_cast<Function>(&V)) {
addCalledFunction(Fn, Change);
} else {
@@ -9795,17 +9397,17 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
return true;
};
+ SmallVector<AA::ValueAndContext> Values;
// Process any value that we might call.
- auto ProcessCalledOperand = [&](Value *V) {
- bool DummyValue = false;
+ auto ProcessCalledOperand = [&](Value *V, Instruction *CtxI) {
bool UsedAssumedInformation = false;
- if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this,
- DummyValue, VisitValue, nullptr,
- UsedAssumedInformation, false)) {
- // If we haven't gone through all values, assume that there are unknown
- // callees.
- setHasUnknownCallee(true, Change);
+ Values.clear();
+ if (!A.getAssumedSimplifiedValues(IRPosition::value(*V), *this, Values,
+ AA::AnyScope, UsedAssumedInformation)) {
+ Values.push_back({*V, CtxI});
}
+ for (auto &VAC : Values)
+ VisitValue(*VAC.getValue(), VAC.getCtxI());
};
CallBase *CB = cast<CallBase>(getCtxI());
@@ -9828,13 +9430,13 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
}
// The most simple case.
- ProcessCalledOperand(CB->getCalledOperand());
+ ProcessCalledOperand(CB->getCalledOperand(), CB);
// Process callback functions.
SmallVector<const Use *, 4u> CallbackUses;
AbstractCallSite::getCallbackUses(*CB, CallbackUses);
for (const Use *U : CallbackUses)
- ProcessCalledOperand(U->get());
+ ProcessCalledOperand(U->get(), CB);
return Change;
}
@@ -9920,8 +9522,11 @@ private:
for (auto *AAEdges : AAEdgesList) {
if (AAEdges->hasUnknownCallee()) {
- if (!CanReachUnknownCallee)
+ if (!CanReachUnknownCallee) {
+ LLVM_DEBUG(dbgs()
+ << "[QueryResolver] Edges include unknown callee!\n");
Change = ChangeStatus::CHANGED;
+ }
CanReachUnknownCallee = true;
return Change;
}
@@ -10065,14 +9670,10 @@ public:
}
bool instructionCanReach(Attributor &A, const Instruction &Inst,
- const Function &Fn,
- bool UseBackwards) const override {
+ const Function &Fn) const override {
if (!isValidState())
return true;
- if (UseBackwards)
- return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr);
-
const auto &Reachability = A.getAAFor<AAReachability>(
*this, IRPosition::function(*getAssociatedFunction()),
DepClassTy::REQUIRED);
@@ -10085,8 +9686,11 @@ public:
// This is a hack for us to be able to cache queries.
auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
QueryResolver &InstQSet = NonConstThis->InstQueries[&Inst];
- if (!AllKnown)
+ if (!AllKnown) {
+ LLVM_DEBUG(dbgs() << "[AAReachability] Not all reachable edges known, "
+ "may reach unknown callee!\n");
InstQSet.CanReachUnknownCallee = true;
+ }
return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn);
}
@@ -10119,8 +9723,11 @@ public:
bool AllKnown =
getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges);
// Update will return change if we this effects any queries.
- if (!AllKnown)
+ if (!AllKnown) {
+ LLVM_DEBUG(dbgs() << "[AAReachability] Not all reachable edges "
+ "known, may reach unknown callee!\n");
InstPair.second.CanReachUnknownCallee = true;
+ }
Change |= InstPair.second.update(A, *this, CallEdges);
}
}
@@ -10133,8 +9740,11 @@ public:
WholeFunction.Reachable.size() + WholeFunction.Unreachable.size();
return "FunctionReachability [" +
- std::to_string(WholeFunction.Reachable.size()) + "," +
- std::to_string(QueryCount) + "]";
+ (canReachUnknownCallee()
+ ? "unknown"
+ : (std::to_string(WholeFunction.Reachable.size()) + "," +
+ std::to_string(QueryCount))) +
+ "]";
}
void trackStatistics() const override {}
@@ -10156,6 +9766,822 @@ private:
};
} // namespace
+template <typename AAType>
+static Optional<Constant *>
+askForAssumedConstant(Attributor &A, const AbstractAttribute &QueryingAA,
+ const IRPosition &IRP, Type &Ty) {
+ if (!Ty.isIntegerTy())
+ return nullptr;
+
+ // This will also pass the call base context.
+ const auto &AA = A.getAAFor<AAType>(QueryingAA, IRP, DepClassTy::NONE);
+
+ Optional<Constant *> COpt = AA.getAssumedConstant(A);
+
+ if (!COpt.has_value()) {
+ A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL);
+ return llvm::None;
+ }
+ if (auto *C = COpt.value()) {
+ A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL);
+ return C;
+ }
+ return nullptr;
+}
+
+Value *AAPotentialValues::getSingleValue(
+ Attributor &A, const AbstractAttribute &AA, const IRPosition &IRP,
+ SmallVectorImpl<AA::ValueAndContext> &Values) {
+ Type &Ty = *IRP.getAssociatedType();
+ Optional<Value *> V;
+ for (auto &It : Values) {
+ V = AA::combineOptionalValuesInAAValueLatice(V, It.getValue(), &Ty);
+ if (V.has_value() && !V.value())
+ break;
+ }
+ if (!V.has_value())
+ return UndefValue::get(&Ty);
+ return V.value();
+}
+
+namespace {
+struct AAPotentialValuesImpl : AAPotentialValues {
+ using StateType = PotentialLLVMValuesState;
+
+ AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValues(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ if (A.hasSimplificationCallback(getIRPosition())) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+ Value *Stripped = getAssociatedValue().stripPointerCasts();
+ if (isa<Constant>(Stripped)) {
+ addValue(A, getState(), *Stripped, getCtxI(), AA::AnyScope,
+ getAnchorScope());
+ indicateOptimisticFixpoint();
+ return;
+ }
+ AAPotentialValues::initialize(A);
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ std::string Str;
+ llvm::raw_string_ostream OS(Str);
+ OS << getState();
+ return OS.str();
+ }
+
+ template <typename AAType>
+ static Optional<Value *> askOtherAA(Attributor &A,
+ const AbstractAttribute &AA,
+ const IRPosition &IRP, Type &Ty) {
+ if (isa<Constant>(IRP.getAssociatedValue()))
+ return &IRP.getAssociatedValue();
+ Optional<Constant *> C = askForAssumedConstant<AAType>(A, AA, IRP, Ty);
+ if (!C)
+ return llvm::None;
+ if (C.value())
+ if (auto *CC = AA::getWithType(**C, Ty))
+ return CC;
+ return nullptr;
+ }
+
+ void addValue(Attributor &A, StateType &State, Value &V,
+ const Instruction *CtxI, AA::ValueScope S,
+ Function *AnchorScope) const {
+
+ IRPosition ValIRP = IRPosition::value(V);
+ if (auto *CB = dyn_cast_or_null<CallBase>(CtxI)) {
+ for (auto &U : CB->args()) {
+ if (U.get() != &V)
+ continue;
+ ValIRP = IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U));
+ break;
+ }
+ }
+
+ Value *VPtr = &V;
+ if (ValIRP.getAssociatedType()->isIntegerTy()) {
+ Type &Ty = *getAssociatedType();
+ Optional<Value *> SimpleV =
+ askOtherAA<AAValueConstantRange>(A, *this, ValIRP, Ty);
+ if (SimpleV.has_value() && !SimpleV.value()) {
+ auto &PotentialConstantsAA = A.getAAFor<AAPotentialConstantValues>(
+ *this, ValIRP, DepClassTy::OPTIONAL);
+ if (PotentialConstantsAA.isValidState()) {
+ for (auto &It : PotentialConstantsAA.getAssumedSet()) {
+ State.unionAssumed({{*ConstantInt::get(&Ty, It), nullptr}, S});
+ }
+ assert(!PotentialConstantsAA.undefIsContained() &&
+ "Undef should be an explicit value!");
+ return;
+ }
+ }
+ if (!SimpleV.has_value())
+ return;
+
+ if (SimpleV.value())
+ VPtr = SimpleV.value();
+ }
+
+ if (isa<ConstantInt>(VPtr))
+ CtxI = nullptr;
+ if (!AA::isValidInScope(*VPtr, AnchorScope))
+ S = AA::ValueScope(S | AA::Interprocedural);
+
+ State.unionAssumed({{*VPtr, CtxI}, S});
+ }
+
+ /// Helper struct to tie a value+context pair together with the scope for
+ /// which this is the simplified version.
+ struct ItemInfo {
+ AA::ValueAndContext I;
+ AA::ValueScope S;
+ };
+
+ bool recurseForValue(Attributor &A, const IRPosition &IRP, AA::ValueScope S) {
+ SmallMapVector<AA::ValueAndContext, int, 8> ValueScopeMap;
+ for (auto CS : {AA::Intraprocedural, AA::Interprocedural}) {
+ if (!(CS & S))
+ continue;
+
+ bool UsedAssumedInformation = false;
+ SmallVector<AA::ValueAndContext> Values;
+ if (!A.getAssumedSimplifiedValues(IRP, this, Values, CS,
+ UsedAssumedInformation))
+ return false;
+
+ for (auto &It : Values)
+ ValueScopeMap[It] += CS;
+ }
+ for (auto &It : ValueScopeMap)
+ addValue(A, getState(), *It.first.getValue(), It.first.getCtxI(),
+ AA::ValueScope(It.second), getAnchorScope());
+
+ return true;
+ }
+
+ void giveUpOnIntraprocedural(Attributor &A) {
+ auto NewS = StateType::getBestState(getState());
+ for (auto &It : getAssumedSet()) {
+ if (It.second == AA::Intraprocedural)
+ continue;
+ addValue(A, NewS, *It.first.getValue(), It.first.getCtxI(),
+ AA::Interprocedural, getAnchorScope());
+ }
+ assert(!undefIsContained() && "Undef should be an explicit value!");
+ addValue(A, NewS, getAssociatedValue(), getCtxI(), AA::Intraprocedural,
+ getAnchorScope());
+ getState() = NewS;
+ }
+
+ /// See AbstractState::indicatePessimisticFixpoint(...).
+ ChangeStatus indicatePessimisticFixpoint() override {
+ getState() = StateType::getBestState(getState());
+ getState().unionAssumed({{getAssociatedValue(), getCtxI()}, AA::AnyScope});
+ AAPotentialValues::indicateOptimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ return indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ SmallVector<AA::ValueAndContext> Values;
+ for (AA::ValueScope S : {AA::Interprocedural, AA::Intraprocedural}) {
+ Values.clear();
+ if (!getAssumedSimplifiedValues(A, Values, S))
+ continue;
+ Value &OldV = getAssociatedValue();
+ if (isa<UndefValue>(OldV))
+ continue;
+ Value *NewV = getSingleValue(A, *this, getIRPosition(), Values);
+ if (!NewV || NewV == &OldV)
+ continue;
+ if (getCtxI() &&
+ !AA::isValidAtPosition({*NewV, *getCtxI()}, A.getInfoCache()))
+ continue;
+ if (A.changeAfterManifest(getIRPosition(), *NewV))
+ return ChangeStatus::CHANGED;
+ }
+ return ChangeStatus::UNCHANGED;
+ }
+
+ bool getAssumedSimplifiedValues(Attributor &A,
+ SmallVectorImpl<AA::ValueAndContext> &Values,
+ AA::ValueScope S) const override {
+ if (!isValidState())
+ return false;
+ for (auto &It : getAssumedSet())
+ if (It.second & S)
+ Values.push_back(It.first);
+ assert(!undefIsContained() && "Undef should be an explicit value!");
+ return true;
+ }
+};
+
+struct AAPotentialValuesFloating : AAPotentialValuesImpl {
+ AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto AssumedBefore = getAssumed();
+
+ genericValueTraversal(A);
+
+ return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ /// Helper struct to remember which AAIsDead instances we actually used.
+ struct LivenessInfo {
+ const AAIsDead *LivenessAA = nullptr;
+ bool AnyDead = false;
+ };
+
+ /// Check if \p Cmp is a comparison we can simplify.
+ ///
+ /// We handle multiple cases, one in which at least one operand is an
+ /// (assumed) nullptr. If so, try to simplify it using AANonNull on the other
+ /// operand. Return true if successful, in that case Worklist will be updated.
+ bool handleCmp(Attributor &A, CmpInst &Cmp, ItemInfo II,
+ SmallVectorImpl<ItemInfo> &Worklist) {
+ Value *LHS = Cmp.getOperand(0);
+ Value *RHS = Cmp.getOperand(1);
+
+ // Simplify the operands first.
+ bool UsedAssumedInformation = false;
+ const auto &SimplifiedLHS = A.getAssumedSimplified(
+ IRPosition::value(*LHS, getCallBaseContext()), *this,
+ UsedAssumedInformation, AA::Intraprocedural);
+ if (!SimplifiedLHS.has_value())
+ return true;
+ if (!SimplifiedLHS.value())
+ return false;
+ LHS = *SimplifiedLHS;
+
+ const auto &SimplifiedRHS = A.getAssumedSimplified(
+ IRPosition::value(*RHS, getCallBaseContext()), *this,
+ UsedAssumedInformation, AA::Intraprocedural);
+ if (!SimplifiedRHS.has_value())
+ return true;
+ if (!SimplifiedRHS.value())
+ return false;
+ RHS = *SimplifiedRHS;
+
+ LLVMContext &Ctx = Cmp.getContext();
+ // Handle the trivial case first in which we don't even need to think about
+ // null or non-null.
+ if (LHS == RHS && (Cmp.isTrueWhenEqual() || Cmp.isFalseWhenEqual())) {
+ Constant *NewV =
+ ConstantInt::get(Type::getInt1Ty(Ctx), Cmp.isTrueWhenEqual());
+ addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S,
+ getAnchorScope());
+ return true;
+ }
+
+ // From now on we only handle equalities (==, !=).
+ ICmpInst *ICmp = dyn_cast<ICmpInst>(&Cmp);
+ if (!ICmp || !ICmp->isEquality())
+ return false;
+
+ bool LHSIsNull = isa<ConstantPointerNull>(LHS);
+ bool RHSIsNull = isa<ConstantPointerNull>(RHS);
+ if (!LHSIsNull && !RHSIsNull)
+ return false;
+
+ // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the
+ // non-nullptr operand and if we assume it's non-null we can conclude the
+ // result of the comparison.
+ assert((LHSIsNull || RHSIsNull) &&
+ "Expected nullptr versus non-nullptr comparison at this point");
+
+ // The index is the operand that we assume is not null.
+ unsigned PtrIdx = LHSIsNull;
+ auto &PtrNonNullAA = A.getAAFor<AANonNull>(
+ *this, IRPosition::value(*ICmp->getOperand(PtrIdx)),
+ DepClassTy::REQUIRED);
+ if (!PtrNonNullAA.isAssumedNonNull())
+ return false;
+
+ // The new value depends on the predicate, true for != and false for ==.
+ Constant *NewV = ConstantInt::get(Type::getInt1Ty(Ctx),
+ ICmp->getPredicate() == CmpInst::ICMP_NE);
+ addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S, getAnchorScope());
+ return true;
+ }
+
+ bool handleSelectInst(Attributor &A, SelectInst &SI, ItemInfo II,
+ SmallVectorImpl<ItemInfo> &Worklist) {
+ const Instruction *CtxI = II.I.getCtxI();
+ bool UsedAssumedInformation = false;
+
+ Optional<Constant *> C =
+ A.getAssumedConstant(*SI.getCondition(), *this, UsedAssumedInformation);
+ bool NoValueYet = !C.has_value();
+ if (NoValueYet || isa_and_nonnull<UndefValue>(*C))
+ return true;
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) {
+ if (CI->isZero())
+ Worklist.push_back({{*SI.getFalseValue(), CtxI}, II.S});
+ else
+ Worklist.push_back({{*SI.getTrueValue(), CtxI}, II.S});
+ } else {
+ // We could not simplify the condition, assume both values.
+ Worklist.push_back({{*SI.getTrueValue(), CtxI}, II.S});
+ Worklist.push_back({{*SI.getFalseValue(), CtxI}, II.S});
+ }
+ return true;
+ }
+
+ bool handleLoadInst(Attributor &A, LoadInst &LI, ItemInfo II,
+ SmallVectorImpl<ItemInfo> &Worklist) {
+ SmallSetVector<Value *, 4> PotentialCopies;
+ SmallSetVector<Instruction *, 4> PotentialValueOrigins;
+ bool UsedAssumedInformation = false;
+ if (!AA::getPotentiallyLoadedValues(A, LI, PotentialCopies,
+ PotentialValueOrigins, *this,
+ UsedAssumedInformation,
+ /* OnlyExact */ true)) {
+ LLVM_DEBUG(dbgs() << "[AAPotentialValues] Failed to get potentially "
+ "loaded values for load instruction "
+ << LI << "\n");
+ return false;
+ }
+
+ // Do not simplify loads that are only used in llvm.assume if we cannot also
+ // remove all stores that may feed into the load. The reason is that the
+ // assume is probably worth something as long as the stores are around.
+ InformationCache &InfoCache = A.getInfoCache();
+ if (InfoCache.isOnlyUsedByAssume(LI)) {
+ if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) {
+ if (!I)
+ return true;
+ if (auto *SI = dyn_cast<StoreInst>(I))
+ return A.isAssumedDead(SI->getOperandUse(0), this,
+ /* LivenessAA */ nullptr,
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ false);
+ return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr,
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ false);
+ })) {
+ LLVM_DEBUG(dbgs() << "[AAPotentialValues] Load is onl used by assumes "
+ "and we cannot delete all the stores: "
+ << LI << "\n");
+ return false;
+ }
+ }
+
+ // Values have to be dynamically unique or we loose the fact that a
+ // single llvm::Value might represent two runtime values (e.g.,
+ // stack locations in different recursive calls).
+ const Instruction *CtxI = II.I.getCtxI();
+ bool ScopeIsLocal = (II.S & AA::Intraprocedural);
+ bool AllLocal = ScopeIsLocal;
+ bool DynamicallyUnique = llvm::all_of(PotentialCopies, [&](Value *PC) {
+ AllLocal &= AA::isValidInScope(*PC, getAnchorScope());
+ return AA::isDynamicallyUnique(A, *this, *PC);
+ });
+ if (!DynamicallyUnique) {
+ LLVM_DEBUG(dbgs() << "[AAPotentialValues] Not all potentially loaded "
+ "values are dynamically unique: "
+ << LI << "\n");
+ return false;
+ }
+
+ for (auto *PotentialCopy : PotentialCopies) {
+ if (AllLocal) {
+ Worklist.push_back({{*PotentialCopy, CtxI}, II.S});
+ } else {
+ Worklist.push_back({{*PotentialCopy, CtxI}, AA::Interprocedural});
+ }
+ }
+ if (!AllLocal && ScopeIsLocal)
+ addValue(A, getState(), LI, CtxI, AA::Intraprocedural, getAnchorScope());
+ return true;
+ }
+
+ bool handlePHINode(
+ Attributor &A, PHINode &PHI, ItemInfo II,
+ SmallVectorImpl<ItemInfo> &Worklist,
+ SmallMapVector<const Function *, LivenessInfo, 4> &LivenessAAs) {
+ auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & {
+ LivenessInfo &LI = LivenessAAs[&F];
+ if (!LI.LivenessAA)
+ LI.LivenessAA = &A.getAAFor<AAIsDead>(*this, IRPosition::function(F),
+ DepClassTy::NONE);
+ return LI;
+ };
+
+ LivenessInfo &LI = GetLivenessInfo(*PHI.getFunction());
+ for (unsigned u = 0, e = PHI.getNumIncomingValues(); u < e; u++) {
+ BasicBlock *IncomingBB = PHI.getIncomingBlock(u);
+ if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI.getParent())) {
+ LI.AnyDead = true;
+ continue;
+ }
+ Worklist.push_back(
+ {{*PHI.getIncomingValue(u), IncomingBB->getTerminator()}, II.S});
+ }
+ return true;
+ }
+
+ /// Use the generic, non-optimistic InstSimplfy functionality if we managed to
+ /// simplify any operand of the instruction \p I. Return true if successful,
+ /// in that case Worklist will be updated.
+ bool handleGenericInst(Attributor &A, Instruction &I, ItemInfo II,
+ SmallVectorImpl<ItemInfo> &Worklist) {
+ bool SomeSimplified = false;
+ bool UsedAssumedInformation = false;
+
+ SmallVector<Value *, 8> NewOps(I.getNumOperands());
+ int Idx = 0;
+ for (Value *Op : I.operands()) {
+ const auto &SimplifiedOp = A.getAssumedSimplified(
+ IRPosition::value(*Op, getCallBaseContext()), *this,
+ UsedAssumedInformation, AA::Intraprocedural);
+ // If we are not sure about any operand we are not sure about the entire
+ // instruction, we'll wait.
+ if (!SimplifiedOp.has_value())
+ return true;
+
+ if (SimplifiedOp.value())
+ NewOps[Idx] = SimplifiedOp.value();
+ else
+ NewOps[Idx] = Op;
+
+ SomeSimplified |= (NewOps[Idx] != Op);
+ ++Idx;
+ }
+
+ // We won't bother with the InstSimplify interface if we didn't simplify any
+ // operand ourselves.
+ if (!SomeSimplified)
+ return false;
+
+ InformationCache &InfoCache = A.getInfoCache();
+ Function *F = I.getFunction();
+ const auto *DT =
+ InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F);
+ const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+ auto *AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F);
+ OptimizationRemarkEmitter *ORE = nullptr;
+
+ const DataLayout &DL = I.getModule()->getDataLayout();
+ SimplifyQuery Q(DL, TLI, DT, AC, &I);
+ Value *NewV = simplifyInstructionWithOperands(&I, NewOps, Q, ORE);
+ if (!NewV || NewV == &I)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Generic inst " << I << " assumed simplified to "
+ << *NewV << "\n");
+ Worklist.push_back({{*NewV, II.I.getCtxI()}, II.S});
+ return true;
+ }
+
+ bool simplifyInstruction(
+ Attributor &A, Instruction &I, ItemInfo II,
+ SmallVectorImpl<ItemInfo> &Worklist,
+ SmallMapVector<const Function *, LivenessInfo, 4> &LivenessAAs) {
+ if (auto *CI = dyn_cast<CmpInst>(&I))
+ if (handleCmp(A, *CI, II, Worklist))
+ return true;
+
+ switch (I.getOpcode()) {
+ case Instruction::Select:
+ return handleSelectInst(A, cast<SelectInst>(I), II, Worklist);
+ case Instruction::PHI:
+ return handlePHINode(A, cast<PHINode>(I), II, Worklist, LivenessAAs);
+ case Instruction::Load:
+ return handleLoadInst(A, cast<LoadInst>(I), II, Worklist);
+ default:
+ return handleGenericInst(A, I, II, Worklist);
+ };
+ return false;
+ }
+
+ void genericValueTraversal(Attributor &A) {
+ SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs;
+
+ Value *InitialV = &getAssociatedValue();
+ SmallSet<AA::ValueAndContext, 16> Visited;
+ SmallVector<ItemInfo, 16> Worklist;
+ Worklist.push_back({{*InitialV, getCtxI()}, AA::AnyScope});
+
+ int Iteration = 0;
+ do {
+ ItemInfo II = Worklist.pop_back_val();
+ Value *V = II.I.getValue();
+ assert(V);
+ const Instruction *CtxI = II.I.getCtxI();
+ AA::ValueScope S = II.S;
+
+ // Check if we should process the current value. To prevent endless
+ // recursion keep a record of the values we followed!
+ if (!Visited.insert(II.I).second)
+ continue;
+
+ // Make sure we limit the compile time for complex expressions.
+ if (Iteration++ >= MaxPotentialValuesIterations) {
+ LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: "
+ << Iteration << "!\n");
+ addValue(A, getState(), *V, CtxI, S, getAnchorScope());
+ continue;
+ }
+
+ // Explicitly look through calls with a "returned" attribute if we do
+ // not have a pointer as stripPointerCasts only works on them.
+ Value *NewV = nullptr;
+ if (V->getType()->isPointerTy()) {
+ NewV = AA::getWithType(*V->stripPointerCasts(), *V->getType());
+ } else {
+ auto *CB = dyn_cast<CallBase>(V);
+ if (CB && CB->getCalledFunction()) {
+ for (Argument &Arg : CB->getCalledFunction()->args())
+ if (Arg.hasReturnedAttr()) {
+ NewV = CB->getArgOperand(Arg.getArgNo());
+ break;
+ }
+ }
+ }
+ if (NewV && NewV != V) {
+ Worklist.push_back({{*NewV, CtxI}, S});
+ continue;
+ }
+
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (simplifyInstruction(A, *I, II, Worklist, LivenessAAs))
+ continue;
+ }
+
+ if (V != InitialV || isa<Argument>(V))
+ if (recurseForValue(A, IRPosition::value(*V), II.S))
+ continue;
+
+ // If we haven't stripped anything we give up.
+ if (V == InitialV && CtxI == getCtxI()) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
+ addValue(A, getState(), *V, CtxI, S, getAnchorScope());
+ } while (!Worklist.empty());
+
+ // If we actually used liveness information so we have to record a
+ // dependence.
+ for (auto &It : LivenessAAs)
+ if (It.second.AnyDead)
+ A.recordDependence(*It.second.LivenessAA, *this, DepClassTy::OPTIONAL);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesArgument final : AAPotentialValuesImpl {
+ using Base = AAPotentialValuesImpl;
+ AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ auto &Arg = cast<Argument>(getAssociatedValue());
+ if (Arg.hasPointeeInMemoryValueAttr())
+ indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto AssumedBefore = getAssumed();
+
+ unsigned CSArgNo = getCallSiteArgNo();
+
+ bool UsedAssumedInformation = false;
+ SmallVector<AA::ValueAndContext> Values;
+ auto CallSitePred = [&](AbstractCallSite ACS) {
+ const auto CSArgIRP = IRPosition::callsite_argument(ACS, CSArgNo);
+ if (CSArgIRP.getPositionKind() == IRP_INVALID)
+ return false;
+
+ if (!A.getAssumedSimplifiedValues(CSArgIRP, this, Values,
+ AA::Interprocedural,
+ UsedAssumedInformation))
+ return false;
+
+ return isValidState();
+ };
+
+ if (!A.checkForAllCallSites(CallSitePred, *this,
+ /* RequireAllCallSites */ true,
+ UsedAssumedInformation))
+ return indicatePessimisticFixpoint();
+
+ Function *Fn = getAssociatedFunction();
+ bool AnyNonLocal = false;
+ for (auto &It : Values) {
+ if (isa<Constant>(It.getValue())) {
+ addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::AnyScope,
+ getAnchorScope());
+ continue;
+ }
+ if (!AA::isDynamicallyUnique(A, *this, *It.getValue()))
+ return indicatePessimisticFixpoint();
+
+ if (auto *Arg = dyn_cast<Argument>(It.getValue()))
+ if (Arg->getParent() == Fn) {
+ addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::AnyScope,
+ getAnchorScope());
+ continue;
+ }
+ addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::Interprocedural,
+ getAnchorScope());
+ AnyNonLocal = true;
+ }
+ if (undefIsContained())
+ unionAssumedWithUndef();
+ if (AnyNonLocal)
+ giveUpOnIntraprocedural(A);
+
+ return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_ARG_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesReturned
+ : AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> {
+ using Base =
+ AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>;
+ AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(..).
+ void initialize(Attributor &A) override {
+ if (A.hasSimplificationCallback(getIRPosition()))
+ indicatePessimisticFixpoint();
+ else
+ AAPotentialValues::initialize(A);
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ // We queried AAValueSimplify for the returned values so they will be
+ // replaced if a simplified form was found. Nothing to do here.
+ return ChangeStatus::UNCHANGED;
+ }
+
+ ChangeStatus indicatePessimisticFixpoint() override {
+ return AAPotentialValues::indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesFunction : AAPotentialValuesImpl {
+ AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will "
+ "not be called");
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FN_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesCallSite : AAPotentialValuesFunction {
+ AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesFunction(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CS_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesCallSiteReturned : AAPotentialValuesImpl {
+ AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto AssumedBefore = getAssumed();
+
+ Function *Callee = getAssociatedFunction();
+ if (!Callee)
+ return indicatePessimisticFixpoint();
+
+ bool UsedAssumedInformation = false;
+ auto *CB = cast<CallBase>(getCtxI());
+ if (CB->isMustTailCall() &&
+ !A.isAssumedDead(IRPosition::inst(*CB), this, nullptr,
+ UsedAssumedInformation))
+ return indicatePessimisticFixpoint();
+
+ SmallVector<AA::ValueAndContext> Values;
+ if (!A.getAssumedSimplifiedValues(IRPosition::returned(*Callee), this,
+ Values, AA::Intraprocedural,
+ UsedAssumedInformation))
+ return indicatePessimisticFixpoint();
+
+ Function *Caller = CB->getCaller();
+
+ bool AnyNonLocal = false;
+ for (auto &It : Values) {
+ Value *V = It.getValue();
+ Optional<Value *> CallerV = A.translateArgumentToCallSiteContent(
+ V, *CB, *this, UsedAssumedInformation);
+ if (!CallerV.has_value()) {
+ // Nothing to do as long as no value was determined.
+ continue;
+ }
+ V = CallerV.value() ? CallerV.value() : V;
+ if (AA::isDynamicallyUnique(A, *this, *V) &&
+ AA::isValidInScope(*V, Caller)) {
+ if (CallerV.value()) {
+ SmallVector<AA::ValueAndContext> ArgValues;
+ IRPosition IRP = IRPosition::value(*V);
+ if (auto *Arg = dyn_cast<Argument>(V))
+ if (Arg->getParent() == CB->getCalledFunction())
+ IRP = IRPosition::callsite_argument(*CB, Arg->getArgNo());
+ if (recurseForValue(A, IRP, AA::AnyScope))
+ continue;
+ }
+ addValue(A, getState(), *V, CB, AA::AnyScope, getAnchorScope());
+ } else {
+ AnyNonLocal = true;
+ break;
+ }
+ }
+ if (AnyNonLocal) {
+ Values.clear();
+ if (!A.getAssumedSimplifiedValues(IRPosition::returned(*Callee), this,
+ Values, AA::Interprocedural,
+ UsedAssumedInformation))
+ return indicatePessimisticFixpoint();
+ AnyNonLocal = false;
+ getState() = PotentialLLVMValuesState::getBestState();
+ for (auto &It : Values) {
+ Value *V = It.getValue();
+ if (!AA::isDynamicallyUnique(A, *this, *V))
+ return indicatePessimisticFixpoint();
+ if (AA::isValidInScope(*V, Caller)) {
+ addValue(A, getState(), *V, CB, AA::AnyScope, getAnchorScope());
+ } else {
+ AnyNonLocal = true;
+ addValue(A, getState(), *V, CB, AA::Interprocedural,
+ getAnchorScope());
+ }
+ }
+ if (AnyNonLocal)
+ giveUpOnIntraprocedural(A);
+ }
+ return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ ChangeStatus indicatePessimisticFixpoint() override {
+ return AAPotentialValues::indicatePessimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(potential_values)
+ }
+};
+
+struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating {
+ AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAPotentialValuesFloating(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(potential_values)
+ }
+};
+} // namespace
+
/// ---------------------- Assumption Propagation ------------------------------
namespace {
struct AAAssumptionInfoImpl : public AAAssumptionInfo {
@@ -10323,6 +10749,7 @@ const char AAMemoryBehavior::ID = 0;
const char AAMemoryLocation::ID = 0;
const char AAValueConstantRange::ID = 0;
const char AAPotentialConstantValues::ID = 0;
+const char AAPotentialValues::ID = 0;
const char AANoUndef::ID = 0;
const char AACallEdges::ID = 0;
const char AAFunctionReachability::ID = 0;
@@ -10441,6 +10868,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInstanceInfo)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialConstantValues)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 56e2df14ff38..360ec24a0509 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -1147,6 +1147,14 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
// Declare a callback for the internalize pass that will ask for every
// candidate GlobalValue if it can be internalized or not.
auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
+ // It may be the case that GV is on a chain of an ifunc, its alias and
+ // subsequent aliases. In this case, the summary for the value is not
+ // available.
+ if (isa<GlobalIFunc>(&GV) ||
+ (isa<GlobalAlias>(&GV) &&
+ isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject())))
+ return true;
+
// Lookup the linkage recorded in the summaries during global analysis.
auto GS = DefinedGlobals.find(GV.getGUID());
if (GS == DefinedGlobals.end()) {
@@ -1277,7 +1285,7 @@ Expected<bool> FunctionImporter::importFunctions(
}
}
for (GlobalAlias &GA : SrcModule->aliases()) {
- if (!GA.hasName())
+ if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
continue;
auto GUID = GA.getGUID();
auto Import = ImportGUIDs.count(GUID);
@@ -1413,29 +1421,6 @@ static bool doImportingForModule(Module &M) {
return *Result;
}
-namespace {
-
-/// Pass that performs cross-module function import provided a summary file.
-class FunctionImportLegacyPass : public ModulePass {
-public:
- /// Pass identification, replacement for typeid
- static char ID;
-
- explicit FunctionImportLegacyPass() : ModulePass(ID) {}
-
- /// Specify pass name for debug output
- StringRef getPassName() const override { return "Function Importing"; }
-
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
-
- return doImportingForModule(M);
- }
-};
-
-} // end anonymous namespace
-
PreservedAnalyses FunctionImportPass::run(Module &M,
ModuleAnalysisManager &AM) {
if (!doImportingForModule(M))
@@ -1443,15 +1428,3 @@ PreservedAnalyses FunctionImportPass::run(Module &M,
return PreservedAnalyses::none();
}
-
-char FunctionImportLegacyPass::ID = 0;
-INITIALIZE_PASS(FunctionImportLegacyPass, "function-import",
- "Summary Based Function Import", false, false)
-
-namespace llvm {
-
-Pass *createFunctionImportPass() {
- return new FunctionImportLegacyPass();
-}
-
-} // end namespace llvm
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 1ad6e2b2a1d2..ec26db8bfc0b 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1040,7 +1040,7 @@ static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV,
CallInst *CI,
const DataLayout &DL,
TargetLibraryInfo *TLI) {
- if (!isAllocRemovable(CI, TLI))
+ if (!isRemovableAlloc(CI, TLI))
// Must be able to remove the call when we get done..
return false;
diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp
index ec2b80012ed6..dfd434e61d5b 100644
--- a/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/llvm/lib/Transforms/IPO/IPO.cpp
@@ -44,7 +44,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeLoopExtractorLegacyPassPass(Registry);
initializeBlockExtractorLegacyPassPass(Registry);
initializeSingleLoopExtractorPass(Registry);
- initializeLowerTypeTestsPass(Registry);
initializeMergeFunctionsLegacyPassPass(Registry);
initializePartialInlinerLegacyPassPass(Registry);
initializeAttributorLegacyPassPass(Registry);
@@ -60,9 +59,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeStripNonDebugSymbolsPass(Registry);
initializeBarrierNoopPass(Registry);
initializeEliminateAvailableExternallyLegacyPassPass(Registry);
- initializeSampleProfileLoaderLegacyPassPass(Registry);
- initializeFunctionImportLegacyPassPass(Registry);
- initializeWholeProgramDevirtPass(Registry);
}
void LLVMInitializeIPO(LLVMPassRegistryRef R) {
diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp
index 5aa5b905f06c..85b1a8303d33 100644
--- a/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -28,6 +28,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
@@ -40,13 +41,13 @@ STATISTIC(NumAliases, "Number of aliases internalized");
STATISTIC(NumFunctions, "Number of functions internalized");
STATISTIC(NumGlobals, "Number of global vars internalized");
-// APIFile - A file which contains a list of symbols that should not be marked
-// external.
+// APIFile - A file which contains a list of symbol glob patterns that should
+// not be marked external.
static cl::opt<std::string>
APIFile("internalize-public-api-file", cl::value_desc("filename"),
cl::desc("A file containing list of symbol names to preserve"));
-// APIList - A list of symbols that should not be marked internal.
+// APIList - A list of symbol glob patterns that should not be marked internal.
static cl::list<std::string>
APIList("internalize-public-api-list", cl::value_desc("list"),
cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
@@ -59,29 +60,44 @@ public:
PreserveAPIList() {
if (!APIFile.empty())
LoadFile(APIFile);
- ExternalNames.insert(APIList.begin(), APIList.end());
+ for (StringRef Pattern : APIList)
+ addGlob(Pattern);
}
bool operator()(const GlobalValue &GV) {
- return ExternalNames.count(GV.getName());
+ return llvm::any_of(
+ ExternalNames, [&](GlobPattern &GP) { return GP.match(GV.getName()); });
}
private:
// Contains the set of symbols loaded from file
- StringSet<> ExternalNames;
+ SmallVector<GlobPattern> ExternalNames;
+
+ void addGlob(StringRef Pattern) {
+ auto GlobOrErr = GlobPattern::create(Pattern);
+ if (!GlobOrErr) {
+ errs() << "WARNING: when loading pattern: '"
+ << toString(GlobOrErr.takeError()) << "' ignoring";
+ return;
+ }
+ ExternalNames.emplace_back(std::move(*GlobOrErr));
+ }
void LoadFile(StringRef Filename) {
// Load the APIFile...
- ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
MemoryBuffer::getFile(Filename);
- if (!Buf) {
+ if (!BufOrErr) {
errs() << "WARNING: Internalize couldn't load file '" << Filename
<< "'! Continuing as if it's empty.\n";
return; // Just continue as if the file were empty
}
- for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
- ExternalNames.insert(*I);
+ Buf = std::move(*BufOrErr);
+ for (line_iterator I(*Buf, true), E; I != E; ++I)
+ addGlob(*I);
}
+
+ std::shared_ptr<MemoryBuffer> Buf;
};
} // end anonymous namespace
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index d5f1d291f41f..6bf25df101fa 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -528,50 +528,8 @@ public:
// arguments. For testing purposes only.
static bool runForTesting(Module &M);
};
-
-struct LowerTypeTests : public ModulePass {
- static char ID;
-
- bool UseCommandLine = false;
-
- ModuleSummaryIndex *ExportSummary;
- const ModuleSummaryIndex *ImportSummary;
- bool DropTypeTests;
-
- LowerTypeTests() : ModulePass(ID), UseCommandLine(true) {
- initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
- }
-
- LowerTypeTests(ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary, bool DropTypeTests)
- : ModulePass(ID), ExportSummary(ExportSummary),
- ImportSummary(ImportSummary),
- DropTypeTests(DropTypeTests || ClDropTypeTests) {
- initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- if (UseCommandLine)
- return LowerTypeTestsModule::runForTesting(M);
- return LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
- .lower();
- }
-};
-
} // end anonymous namespace
-char LowerTypeTests::ID = 0;
-
-INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
- false)
-
-ModulePass *
-llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary,
- bool DropTypeTests) {
- return new LowerTypeTests(ExportSummary, ImportSummary, DropTypeTests);
-}
-
/// Build a bit set for TypeId using the object layouts in
/// GlobalLayout.
BitSetInfo LowerTypeTestsModule::buildBitSet(
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 8e0ca8c6c997..0b42fc151991 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -4808,7 +4808,7 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
bool UsedAssumedInformation = false;
A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
- UsedAssumedInformation);
+ UsedAssumedInformation, AA::Interprocedural);
} else if (auto *SI = dyn_cast<StoreInst>(&I)) {
A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
}
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 8eef82675e86..f1b6f2bb7de4 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -91,14 +91,6 @@ cl::opt<bool> EnableDFAJumpThreading("enable-dfa-jump-thread",
cl::desc("Enable DFA jump threading."),
cl::init(false), cl::Hidden);
-static cl::opt<bool>
- EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
- cl::desc("Enable preparation for ThinLTO."));
-
-static cl::opt<bool>
- EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden,
- cl::desc("Enable performing ThinLTO."));
-
cl::opt<bool> EnableHotColdSplit("hot-cold-split",
cl::desc("Enable hot-cold splitting pass"));
@@ -192,15 +184,6 @@ PassManagerBuilder::PassManagerBuilder() {
VerifyInput = false;
VerifyOutput = false;
MergeFunctions = false;
- PrepareForLTO = false;
- EnablePGOInstrGen = false;
- EnablePGOCSInstrGen = false;
- EnablePGOCSInstrUse = false;
- PGOInstrGen = "";
- PGOInstrUse = "";
- PGOSampleUse = "";
- PrepareForThinLTO = EnablePrepareForThinLTO;
- PerformThinLTO = EnablePerformThinLTO;
DivergentTarget = false;
CallGraphProfile = true;
}
@@ -390,7 +373,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/false));
// Rotate Loop - disable header duplication at -Oz
- MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
+ MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false));
// TODO: Investigate promotion cap for O1.
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
@@ -470,10 +453,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// Clean up after everything.
MPM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, MPM);
-
- if (EnableCHR && OptLevel >= 3 &&
- (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen))
- MPM.add(createControlHeightReductionLegacyPass());
}
/// FIXME: Should LTO cause any differences to this set of passes?
@@ -598,15 +577,6 @@ void PassManagerBuilder::populateModulePassManager(
legacy::PassManagerBase &MPM) {
MPM.add(createAnnotation2MetadataLegacyPass());
- if (!PGOSampleUse.empty()) {
- MPM.add(createPruneEHPass());
- // In ThinLTO mode, when flattened profile is used, all the available
- // profile information will be annotated in PreLink phase so there is
- // no need to load the profile again in PostLink.
- if (!(FlattenedProfileUsed && PerformThinLTO))
- MPM.add(createSampleProfileLoaderPass(PGOSampleUse));
- }
-
// Allow forcing function attributes as a debugging and tuning aid.
MPM.add(createForceFunctionAttrsLegacyPass());
@@ -628,26 +598,8 @@ void PassManagerBuilder::populateModulePassManager(
else if (GlobalExtensionsNotEmpty() || !Extensions.empty())
MPM.add(createBarrierNoopPass());
- if (PerformThinLTO) {
- MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
- // Drop available_externally and unreferenced globals. This is necessary
- // with ThinLTO in order to avoid leaving undefined references to dead
- // globals in the object file.
- MPM.add(createEliminateAvailableExternallyPass());
- MPM.add(createGlobalDCEPass());
- }
-
addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
- if (PrepareForLTO || PrepareForThinLTO) {
- MPM.add(createCanonicalizeAliasesPass());
- // Rename anon globals to be able to export them in the summary.
- // This has to be done after we add the extensions to the pass manager
- // as there could be passes (e.g. Adddress sanitizer) which introduce
- // new unnamed globals.
- MPM.add(createNameAnonGlobalPass());
- }
-
MPM.add(createAnnotationRemarksLegacyPass());
return;
}
@@ -658,25 +610,6 @@ void PassManagerBuilder::populateModulePassManager(
addInitialAliasAnalysisPasses(MPM);
- // For ThinLTO there are two passes of indirect call promotion. The
- // first is during the compile phase when PerformThinLTO=false and
- // intra-module indirect call targets are promoted. The second is during
- // the ThinLTO backend when PerformThinLTO=true, when we promote imported
- // inter-module indirect calls. For that we perform indirect call promotion
- // earlier in the pass pipeline, here before globalopt. Otherwise imported
- // available_externally functions look unreferenced and are removed.
- if (PerformThinLTO) {
- MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
- }
-
- // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops
- // as it will change the CFG too much to make the 2nd profile annotation
- // in backend more difficult.
- bool PrepareForThinLTOUsingPGOSampleProfile =
- PrepareForThinLTO && !PGOSampleUse.empty();
- if (PrepareForThinLTOUsingPGOSampleProfile)
- DisableUnrollLoops = true;
-
// Infer attributes about declarations if possible.
MPM.add(createInferFunctionAttrsLegacyPass());
@@ -744,7 +677,7 @@ void PassManagerBuilder::populateModulePassManager(
if (RunPartialInlining)
MPM.add(createPartialInliningPass());
- if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO)
+ if (OptLevel > 1)
// Remove avail extern fns and globals definitions if we aren't
// compiling an object file for later LTO. For LTO we want to preserve
// these so they are eligible for inlining at link-time. Note if they
@@ -756,9 +689,6 @@ void PassManagerBuilder::populateModulePassManager(
// and saves running remaining passes on the eliminated functions.
MPM.add(createEliminateAvailableExternallyPass());
- if (EnableOrderFileInstrumentation)
- MPM.add(createInstrOrderFilePass());
-
MPM.add(createReversePostOrderFunctionAttrsPass());
// The inliner performs some kind of dead code elimination as it goes,
@@ -772,24 +702,6 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createGlobalDCEPass());
}
- // If we are planning to perform ThinLTO later, let's not bloat the code with
- // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes
- // during ThinLTO and perform the rest of the optimizations afterward.
- if (PrepareForThinLTO) {
- // Ensure we perform any last passes, but do so before renaming anonymous
- // globals in case the passes add any.
- addExtensionsToPM(EP_OptimizerLast, MPM);
- MPM.add(createCanonicalizeAliasesPass());
- // Rename anon globals to be able to export them in the summary.
- MPM.add(createNameAnonGlobalPass());
- return;
- }
-
- if (PerformThinLTO)
- // Optimize globals now when performing ThinLTO, this enables more
- // optimizations later.
- MPM.add(createGlobalOptimizerPass());
-
// Scheduling LoopVersioningLICM when inlining is over, because after that
// we may see more accurate aliasing. Reason to run this late is that too
// early versioning may prevent further inlining due to increase of code
@@ -834,7 +746,7 @@ void PassManagerBuilder::populateModulePassManager(
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form. Disable header duplication at -Oz.
- MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
+ MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false));
// Distribute loops to allow partial vectorization. I.e. isolate dependences
// into separate loop that would otherwise inhibit vectorization. This is
@@ -856,7 +768,7 @@ void PassManagerBuilder::populateModulePassManager(
// See comment in the new PM for justification of scheduling splitting at
// this stage (\ref buildModuleSimplificationPipeline).
- if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO))
+ if (EnableHotColdSplit)
MPM.add(createHotColdSplittingPass());
if (EnableIROutliner)
@@ -865,10 +777,6 @@ void PassManagerBuilder::populateModulePassManager(
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());
- // Add Module flag "CG Profile" based on Branch Frequency Information.
- if (CallGraphProfile)
- MPM.add(createCGProfileLegacyPass());
-
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
// canonicalization pass that enables other optimizations. As a result,
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
@@ -889,12 +797,6 @@ void PassManagerBuilder::populateModulePassManager(
addExtensionsToPM(EP_OptimizerLast, MPM);
- if (PrepareForLTO) {
- MPM.add(createCanonicalizeAliasesPass());
- // Rename anon globals to be able to handle them in the summary
- MPM.add(createNameAnonGlobalPass());
- }
-
MPM.add(createAnnotationRemarksLegacyPass());
}
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 55fee213cd5f..f76b886e810a 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -546,53 +546,6 @@ private:
return AnnotatedPassName.c_str();
}
};
-
-class SampleProfileLoaderLegacyPass : public ModulePass {
-public:
- // Class identification, replacement for typeinfo
- static char ID;
-
- SampleProfileLoaderLegacyPass(
- StringRef Name = SampleProfileFile,
- ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None)
- : ModulePass(ID), SampleLoader(
- Name, SampleProfileRemappingFile, LTOPhase,
- [&](Function &F) -> AssumptionCache & {
- return ACT->getAssumptionCache(F);
- },
- [&](Function &F) -> TargetTransformInfo & {
- return TTIWP->getTTI(F);
- },
- [&](Function &F) -> TargetLibraryInfo & {
- return TLIWP->getTLI(F);
- }) {
- initializeSampleProfileLoaderLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- void dump() { SampleLoader.dump(); }
-
- bool doInitialization(Module &M) override {
- return SampleLoader.doInitialization(M);
- }
-
- StringRef getPassName() const override { return "Sample profile pass"; }
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- }
-
-private:
- SampleProfileLoader SampleLoader;
- AssumptionCacheTracker *ACT = nullptr;
- TargetTransformInfoWrapperPass *TTIWP = nullptr;
- TargetLibraryInfoWrapperPass *TLIWP = nullptr;
-};
-
} // end anonymous namespace
ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
@@ -734,8 +687,8 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
assert(L && R && "Expect non-null FunctionSamples");
- if (L->getEntrySamples() != R->getEntrySamples())
- return L->getEntrySamples() > R->getEntrySamples();
+ if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate())
+ return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate();
return FunctionSamples::getGUID(L->getName()) <
FunctionSamples::getGUID(R->getName());
};
@@ -750,7 +703,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
// as that already includes both inlined callee and non-inlined ones..
Sum = 0;
for (const auto *const FS : CalleeSamples) {
- Sum += FS->getEntrySamples();
+ Sum += FS->getHeadSamplesEstimate();
R.push_back(FS);
}
llvm::sort(R, FSCompare);
@@ -771,7 +724,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
if (M->empty())
return R;
for (const auto &NameFS : *M) {
- Sum += NameFS.second.getEntrySamples();
+ Sum += NameFS.second.getHeadSamplesEstimate();
R.push_back(&NameFS.second);
}
llvm::sort(R, FSCompare);
@@ -1090,7 +1043,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
bool PreInline =
UsePreInlinerDecision &&
CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
- if (!PreInline && CalleeSample->getEntrySamples() < Threshold)
+ if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
continue;
StringRef Name = CalleeSample->getFuncName();
@@ -1171,7 +1124,8 @@ bool SampleProfileLoader::inlineHotFunctions(
assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
"GUIDToFuncNameMap has to be populated");
AllCandidates.push_back(CB);
- if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS)
+ if (FS->getHeadSamplesEstimate() > 0 ||
+ FunctionSamples::ProfileIsCS)
LocalNotInlinedCallSites.try_emplace(CB, FS);
if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
Hot = true;
@@ -1211,7 +1165,7 @@ bool SampleProfileLoader::inlineHotFunctions(
if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
continue;
- Candidate = {I, FS, FS->getEntrySamples(), 1.0};
+ Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0};
if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
LocalNotInlinedCallSites.erase(I);
LocalChanged = true;
@@ -1325,7 +1279,7 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
Factor = Probe->Factor;
uint64_t CallsiteCount =
- CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0;
+ CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0;
*NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
return true;
}
@@ -1481,7 +1435,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
continue;
}
uint64_t EntryCountDistributed =
- FS->getEntrySamples() * Candidate.CallsiteDistribution;
+ FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
// In addition to regular inline cost check, we also need to make sure
// ICP isn't introducing excessive speculative checks even if individual
// target looks beneficial to promote and inline. That means we should
@@ -1568,7 +1522,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
++NumCSNotInlined;
const FunctionSamples *FS = Pair.getSecond();
- if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) {
+ if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) {
continue;
}
@@ -1586,7 +1540,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
// Use entry samples as head samples during the merge, as inlinees
// don't have head samples.
const_cast<FunctionSamples *>(FS)->addHeadSamples(
- FS->getEntrySamples());
+ FS->getHeadSamplesEstimate());
// Note that we have to do the merge right after processing function.
// This allows OutlineFS's profile to be used for annotation during
@@ -1599,7 +1553,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
} else {
auto pair =
notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
- pair.first->second.entryCount += FS->getEntrySamples();
+ pair.first->second.entryCount += FS->getHeadSamplesEstimate();
}
}
}
@@ -1663,7 +1617,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
if (const FunctionSamplesMap *M =
FS->findFunctionSamplesMapAt(CallSite)) {
for (const auto &NameFS : *M)
- Sum += NameFS.second.getEntrySamples();
+ Sum += NameFS.second.getHeadSamplesEstimate();
}
}
if (Sum)
@@ -1825,17 +1779,6 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
return Changed;
}
-char SampleProfileLoaderLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
- "Sample Profile loader", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
- "Sample Profile loader", false, false)
-
std::unique_ptr<ProfiledCallGraph>
SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
std::unique_ptr<ProfiledCallGraph> ProfiledCG;
@@ -2073,14 +2016,6 @@ bool SampleProfileLoader::doInitialization(Module &M,
return true;
}
-ModulePass *llvm::createSampleProfileLoaderPass() {
- return new SampleProfileLoaderLegacyPass();
-}
-
-ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
- return new SampleProfileLoaderLegacyPass(Name);
-}
-
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI, CallGraph *CG) {
GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
@@ -2141,15 +2076,6 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
return retval;
}
-bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
- ACT = &getAnalysis<AssumptionCacheTracker>();
- TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
- TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>();
- ProfileSummaryInfo *PSI =
- &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- return SampleLoader.runOnModule(M, nullptr, PSI, nullptr);
-}
-
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
DILocation2SampleMap.clear();
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 898a213d0849..ad00c116ce0a 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -747,78 +747,8 @@ struct DevirtIndex {
void run();
};
-
-struct WholeProgramDevirt : public ModulePass {
- static char ID;
-
- bool UseCommandLine = false;
-
- ModuleSummaryIndex *ExportSummary = nullptr;
- const ModuleSummaryIndex *ImportSummary = nullptr;
-
- WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) {
- initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
- }
-
- WholeProgramDevirt(ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary)
- : ModulePass(ID), ExportSummary(ExportSummary),
- ImportSummary(ImportSummary) {
- initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
-
- // In the new pass manager, we can request the optimization
- // remark emitter pass on a per-function-basis, which the
- // OREGetter will do for us.
- // In the old pass manager, this is harder, so we just build
- // an optimization remark emitter on the fly, when we need it.
- std::unique_ptr<OptimizationRemarkEmitter> ORE;
- auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
- ORE = std::make_unique<OptimizationRemarkEmitter>(F);
- return *ORE;
- };
-
- auto LookupDomTree = [this](Function &F) -> DominatorTree & {
- return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
- };
-
- if (UseCommandLine)
- return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter,
- LookupDomTree);
-
- return DevirtModule(M, LegacyAARGetter(*this), OREGetter, LookupDomTree,
- ExportSummary, ImportSummary)
- .run();
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- }
-};
-
} // end anonymous namespace
-INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt",
- "Whole program devirtualization", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt",
- "Whole program devirtualization", false, false)
-char WholeProgramDevirt::ID = 0;
-
-ModulePass *
-llvm::createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary) {
- return new WholeProgramDevirt(ExportSummary, ImportSummary);
-}
-
PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 535a7736454c..4a459ec6c550 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1966,12 +1966,14 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
}
- // If there's no chance any bit will need to borrow from an adjacent bit:
- // sub C, X --> xor X, C
const APInt *Op0C;
- if (match(Op0, m_APInt(Op0C)) &&
- (~computeKnownBits(Op1, 0, &I).Zero).isSubsetOf(*Op0C))
- return BinaryOperator::CreateXor(Op1, Op0);
+ if (match(Op0, m_APInt(Op0C)) && Op0C->isMask()) {
+ // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
+ // zero.
+ KnownBits RHSKnown = computeKnownBits(Op1, 0, &I);
+ if ((*Op0C | RHSKnown.Zero).isAllOnes())
+ return BinaryOperator::CreateXor(Op1, Op0);
+ }
{
Value *Y;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a8f2cd79830a..8253c575bc37 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2664,8 +2664,8 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// Inverted form (example):
// (icmp slt (X | Y), 0) & (icmp sgt (X & Y), -1) -> (icmp slt (X ^ Y), 0)
bool TrueIfSignedL, TrueIfSignedR;
- if (InstCombiner::isSignBitCheck(PredL, *LHSC, TrueIfSignedL) &&
- InstCombiner::isSignBitCheck(PredR, *RHSC, TrueIfSignedR) &&
+ if (isSignBitCheck(PredL, *LHSC, TrueIfSignedL) &&
+ isSignBitCheck(PredR, *RHSC, TrueIfSignedR) &&
(RHS->hasOneUse() || LHS->hasOneUse())) {
Value *X, *Y;
if (IsAnd) {
@@ -3202,25 +3202,38 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// TODO: This can be generalized to compares of non-signbits using
// decomposeBitTestICmp(). It could be enhanced more by using (something like)
// foldLogOpOfMaskedICmps().
- if ((LHS->hasOneUse() || RHS->hasOneUse()) &&
+ const APInt *LC, *RC;
+ if (match(LHS1, m_APInt(LC)) && match(RHS1, m_APInt(RC)) &&
LHS0->getType() == RHS0->getType() &&
- LHS0->getType()->isIntOrIntVectorTy()) {
+ LHS0->getType()->isIntOrIntVectorTy() &&
+ (LHS->hasOneUse() || RHS->hasOneUse())) {
+ // Convert xor of signbit tests to signbit test of xor'd values:
// (X > -1) ^ (Y > -1) --> (X ^ Y) < 0
// (X < 0) ^ (Y < 0) --> (X ^ Y) < 0
- if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
- PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())) ||
- (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
- PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())))
- return Builder.CreateIsNeg(Builder.CreateXor(LHS0, RHS0));
-
// (X > -1) ^ (Y < 0) --> (X ^ Y) > -1
// (X < 0) ^ (Y > -1) --> (X ^ Y) > -1
- if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
- PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())) ||
- (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
- PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())))
- return Builder.CreateIsNotNeg(Builder.CreateXor(LHS0, RHS0));
-
+ bool TrueIfSignedL, TrueIfSignedR;
+ if (isSignBitCheck(PredL, *LC, TrueIfSignedL) &&
+ isSignBitCheck(PredR, *RC, TrueIfSignedR)) {
+ Value *XorLR = Builder.CreateXor(LHS0, RHS0);
+ return TrueIfSignedL == TrueIfSignedR ? Builder.CreateIsNeg(XorLR) :
+ Builder.CreateIsNotNeg(XorLR);
+ }
+
+ // (X > C) ^ (X < C + 2) --> X != C + 1
+ // (X < C + 2) ^ (X > C) --> X != C + 1
+ // Considering the correctness of this pattern, we should avoid that C is
+ // non-negative and C + 2 is negative, although it will be matched by other
+ // patterns.
+ const APInt *C1, *C2;
+ if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_APInt(C1)) &&
+ PredR == CmpInst::ICMP_SLT && match(RHS1, m_APInt(C2))) ||
+ (PredL == CmpInst::ICMP_SLT && match(LHS1, m_APInt(C2)) &&
+ PredR == CmpInst::ICMP_SGT && match(RHS1, m_APInt(C1))))
+ if (LHS0 == RHS0 && *C1 + 2 == *C2 &&
+ (C1->isNegative() || C2->isNonNegative()))
+ return Builder.CreateICmpNE(LHS0,
+ ConstantInt::get(LHS0->getType(), *C1 + 1));
}
// Instead of trying to imitate the folds for and/or, decompose this 'xor'
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index edfdf70c2b97..bc01d2ef7fe2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1140,8 +1140,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI)))
return replaceInstUsesWith(CI, V);
- if (isFreeCall(&CI, &TLI))
- return visitFree(CI);
+ if (Value *FreedOp = getFreedOperand(&CI, &TLI))
+ return visitFree(CI, FreedOp);
// If the caller function (i.e. us, the function that contains this CallInst)
// is nounwind, mark the call as nounwind, even if the callee isn't.
@@ -1539,8 +1539,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Type *Ty = II->getType();
unsigned BitWidth = Ty->getScalarSizeInBits();
Constant *ShAmtC;
- if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC)) &&
- !ShAmtC->containsConstantExpression()) {
+ if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
// Canonicalize a shift amount constant operand to modulo the bit-width.
Constant *WidthC = ConstantInt::get(Ty, BitWidth);
Constant *ModuloC =
@@ -2885,21 +2884,21 @@ bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
// of the respective allocator declaration with generic attributes.
bool Changed = false;
- if (isAllocationFn(&Call, TLI)) {
- uint64_t Size;
- ObjectSizeOpts Opts;
- if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) {
- // TODO: We really should just emit deref_or_null here and then
- // let the generic inference code combine that with nonnull.
- if (Call.hasRetAttr(Attribute::NonNull)) {
- Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
- Call.addRetAttr(
- Attribute::getWithDereferenceableBytes(Call.getContext(), Size));
- } else {
- Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
- Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Size));
- }
+ if (!Call.getType()->isPointerTy())
+ return Changed;
+
+ Optional<APInt> Size = getAllocSize(&Call, TLI);
+ if (Size && *Size != 0) {
+ // TODO: We really should just emit deref_or_null here and then
+ // let the generic inference code combine that with nonnull.
+ if (Call.hasRetAttr(Attribute::NonNull)) {
+ Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
+ Call.addRetAttr(Attribute::getWithDereferenceableBytes(
+ Call.getContext(), Size->getLimitedValue()));
+ } else {
+ Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Size->getLimitedValue()));
}
}
@@ -3079,8 +3078,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
}
- if (isAllocationFn(&Call, &TLI) &&
- isAllocRemovable(&cast<CallBase>(Call), &TLI))
+ if (isRemovableAlloc(&Call, &TLI))
return visitAllocSite(Call);
// Handle intrinsics which can be used in both call and invoke context.
@@ -3242,15 +3240,16 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
// the call because there is no place to put the cast instruction (without
// breaking the critical edge). Bail out in this case.
if (!Caller->use_empty()) {
- if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
- for (User *U : II->users())
+ BasicBlock *PhisNotSupportedBlock = nullptr;
+ if (auto *II = dyn_cast<InvokeInst>(Caller))
+ PhisNotSupportedBlock = II->getNormalDest();
+ if (auto *CB = dyn_cast<CallBrInst>(Caller))
+ PhisNotSupportedBlock = CB->getDefaultDest();
+ if (PhisNotSupportedBlock)
+ for (User *U : Caller->users())
if (PHINode *PN = dyn_cast<PHINode>(U))
- if (PN->getParent() == II->getNormalDest() ||
- PN->getParent() == II->getUnwindDest())
+ if (PN->getParent() == PhisNotSupportedBlock)
return false;
- // FIXME: Be conservative for callbr to avoid a quadratic search.
- if (isa<CallBrInst>(Caller))
- return false;
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9f6d36b85522..158d2e8289e0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2002,9 +2002,12 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
Constant::getNullValue(Mul->getType()));
}
+ if (MulC->isZero() || !(Mul->hasNoSignedWrap() || Mul->hasNoUnsignedWrap()))
+ return nullptr;
+
// If the multiply does not wrap, try to divide the compare constant by the
// multiplication factor.
- if (Cmp.isEquality() && !MulC->isZero()) {
+ if (Cmp.isEquality()) {
// (mul nsw X, MulC) == C --> X == C /s MulC
if (Mul->hasNoSignedWrap() && C.srem(*MulC).isZero()) {
Constant *NewC = ConstantInt::get(Mul->getType(), C.sdiv(*MulC));
@@ -2017,7 +2020,40 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
}
}
- return nullptr;
+ Constant *NewC = nullptr;
+
+ // FIXME: Add assert that Pred is not equal to ICMP_SGE, ICMP_SLE,
+ // ICMP_UGE, ICMP_ULE.
+
+ if (Mul->hasNoSignedWrap()) {
+ if (MulC->isNegative()) {
+ // MININT / -1 --> overflow.
+ if (C.isMinSignedValue() && MulC->isAllOnes())
+ return nullptr;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE)
+ NewC = ConstantInt::get(
+ Mul->getType(),
+ APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::UP));
+ if (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_SGT)
+ NewC = ConstantInt::get(
+ Mul->getType(),
+ APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::DOWN));
+ }
+
+ if (Mul->hasNoUnsignedWrap()) {
+ if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)
+ NewC = ConstantInt::get(
+ Mul->getType(),
+ APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::UP));
+ if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT)
+ NewC = ConstantInt::get(
+ Mul->getType(),
+ APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::DOWN));
+ }
+
+ return NewC ? new ICmpInst(Pred, Mul->getOperand(0), NewC) : nullptr;
}
/// Fold icmp (shl 1, Y), C.
@@ -2235,13 +2271,22 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
bool IsAShr = Shr->getOpcode() == Instruction::AShr;
const APInt *ShiftValC;
- if (match(Shr->getOperand(0), m_APInt(ShiftValC))) {
+ if (match(X, m_APInt(ShiftValC))) {
if (Cmp.isEquality())
return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftValC);
+ // (ShiftValC >> Y) >s -1 --> Y != 0 with ShiftValC < 0
+ // (ShiftValC >> Y) <s 0 --> Y == 0 with ShiftValC < 0
+ bool TrueIfSigned;
+ if (!IsAShr && ShiftValC->isNegative() &&
+ isSignBitCheck(Pred, C, TrueIfSigned))
+ return new ICmpInst(TrueIfSigned ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE,
+ Shr->getOperand(1),
+ ConstantInt::getNullValue(X->getType()));
+
// If the shifted constant is a power-of-2, test the shift amount directly:
- // (ShiftValC >> X) >u C --> X <u (LZ(C) - LZ(ShiftValC))
- // (ShiftValC >> X) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC))
+ // (ShiftValC >> Y) >u C --> X <u (LZ(C) - LZ(ShiftValC))
+ // (ShiftValC >> Y) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC))
if (!IsAShr && ShiftValC->isPowerOf2() &&
(Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_ULT)) {
bool IsUGT = Pred == CmpInst::ICMP_UGT;
@@ -2972,7 +3017,7 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
const APInt *C;
bool TrueIfSigned;
if (match(Op1, m_APInt(C)) && Bitcast->hasOneUse() &&
- InstCombiner::isSignBitCheck(Pred, *C, TrueIfSigned)) {
+ isSignBitCheck(Pred, *C, TrueIfSigned)) {
if (match(BCSrcOp, m_FPExt(m_Value(X))) ||
match(BCSrcOp, m_FPTrunc(m_Value(X)))) {
// (bitcast (fpext/fptrunc X)) to iX) < 0 --> (bitcast X to iY) < 0
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 271154bb3f5a..827b25533513 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -152,7 +152,7 @@ public:
Instruction *visitGEPOfBitcast(BitCastInst *BCI, GetElementPtrInst &GEP);
Instruction *visitAllocaInst(AllocaInst &AI);
Instruction *visitAllocSite(Instruction &FI);
- Instruction *visitFree(CallInst &FI);
+ Instruction *visitFree(CallInst &FI, Value *FreedOp);
Instruction *visitLoadInst(LoadInst &LI);
Instruction *visitStoreInst(StoreInst &SI);
Instruction *visitAtomicRMWInst(AtomicRMWInst &SI);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index f4e2d1239f0f..13c98b935adf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -566,6 +566,13 @@ static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
return false;
return true;
}
+ case Instruction::Mul: {
+ const APInt *MulConst;
+ // We can fold (shr (mul X, -(1 << C)), C) -> (and (neg X), C`)
+ return !IsLeftShift && match(I->getOperand(1), m_APInt(MulConst)) &&
+ MulConst->isNegatedPowerOf2() &&
+ MulConst->countTrailingZeros() == NumBits;
+ }
}
}
@@ -680,6 +687,17 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
isLeftShift, IC, DL));
return PN;
}
+ case Instruction::Mul: {
+ assert(!isLeftShift && "Unexpected shift direction!");
+ auto *Neg = BinaryOperator::CreateNeg(I->getOperand(0));
+ IC.InsertNewInstWith(Neg, *I);
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+ APInt Mask = APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits);
+ auto *And = BinaryOperator::CreateAnd(Neg,
+ ConstantInt::get(I->getType(), Mask));
+ And->takeName(I);
+ return IC.InsertNewInstWith(And, *I);
+ }
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 75520a0c8d5f..71c763de43b4 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -994,6 +994,24 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
return SelectInst::Create(X, TVal, FVal);
}
+static Constant *constantFoldOperationIntoSelectOperand(
+ Instruction &I, SelectInst *SI, Value *SO) {
+ auto *ConstSO = dyn_cast<Constant>(SO);
+ if (!ConstSO)
+ return nullptr;
+
+ SmallVector<Constant *> ConstOps;
+ for (Value *Op : I.operands()) {
+ if (Op == SI)
+ ConstOps.push_back(ConstSO);
+ else if (auto *C = dyn_cast<Constant>(Op))
+ ConstOps.push_back(C);
+ else
+ llvm_unreachable("Operands should be select or constant");
+ }
+ return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
+}
+
static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
InstCombiner::BuilderTy &Builder) {
if (auto *Cast = dyn_cast<CastInst>(&I))
@@ -1101,8 +1119,17 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
}
}
- Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder);
- Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder);
+ // Make sure that one of the select arms constant folds successfully.
+ Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, TV);
+ Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, FV);
+ if (!NewTV && !NewFV)
+ return nullptr;
+
+ // Create an instruction for the arm that did not fold.
+ if (!NewTV)
+ NewTV = foldOperationIntoSelectOperand(Op, TV, Builder);
+ if (!NewFV)
+ NewFV = foldOperationIntoSelectOperand(Op, FV, Builder);
return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
}
@@ -2774,13 +2801,14 @@ static bool isAllocSiteRemovable(Instruction *AI,
continue;
}
- if (isFreeCall(I, &TLI) && getAllocationFamily(I, &TLI) == Family) {
+ if (getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
+ getAllocationFamily(I, &TLI) == Family) {
assert(Family);
Users.emplace_back(I);
continue;
}
- if (isReallocLikeFn(I, &TLI) &&
+ if (getReallocatedOperand(cast<CallBase>(I), &TLI) == PI &&
getAllocationFamily(I, &TLI) == Family) {
assert(Family);
Users.emplace_back(I);
@@ -2805,7 +2833,7 @@ static bool isAllocSiteRemovable(Instruction *AI,
}
Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
- assert(isa<AllocaInst>(MI) || isAllocRemovable(&cast<CallBase>(MI), &TLI));
+ assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
// If we have a malloc call which is only used in any amount of comparisons to
// null and free calls, delete the calls and replace the comparisons with true
@@ -3007,9 +3035,7 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
return &FI;
}
-Instruction *InstCombinerImpl::visitFree(CallInst &FI) {
- Value *Op = FI.getArgOperand(0);
-
+Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) {
// free undef -> unreachable.
if (isa<UndefValue>(Op)) {
// Leave a marker since we can't modify the CFG here.
@@ -3024,12 +3050,10 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI) {
// If we had free(realloc(...)) with no intervening uses, then eliminate the
// realloc() entirely.
- if (CallInst *CI = dyn_cast<CallInst>(Op)) {
- if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI)) {
- return eraseInstFromFunction(
- *replaceInstUsesWith(*CI, CI->getOperand(0)));
- }
- }
+ CallInst *CI = dyn_cast<CallInst>(Op);
+ if (CI && CI->hasOneUse())
+ if (Value *ReallocatedOp = getReallocatedOperand(CI, &TLI))
+ return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
// If we optimize for code size, try to move the call to free before the null
// test so that simplify cfg can remove the empty block and dead code
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 4fed4bd18fb1..cf2754b1dd60 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -108,6 +108,7 @@ static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;
static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000;
static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
+static const uint64_t kFreeBSDAArch64_ShadowOffset64 = 1ULL << 47;
static const uint64_t kFreeBSDKasan_ShadowOffset64 = 0xdffff7c000000000;
static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30;
static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46;
@@ -523,6 +524,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
Mapping.Offset = kPPC64_ShadowOffset64;
else if (IsSystemZ)
Mapping.Offset = kSystemZ_ShadowOffset64;
+ else if (IsFreeBSD && IsAArch64)
+ Mapping.Offset = kFreeBSDAArch64_ShadowOffset64;
else if (IsFreeBSD && !IsMIPS64) {
if (IsKasan)
Mapping.Offset = kFreeBSDKasan_ShadowOffset64;
diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index 57c491436b93..27107f46ed92 100644
--- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -101,42 +101,6 @@ static bool runCGProfilePass(
return addModuleFlags(M, Counts);
}
-namespace {
-struct CGProfileLegacyPass final : public ModulePass {
- static char ID;
- CGProfileLegacyPass() : ModulePass(ID) {
- initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<LazyBlockFrequencyInfoPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-
- bool runOnModule(Module &M) override {
- auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
- return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI();
- };
- auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
- return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- };
-
- return runCGProfilePass(M, GetBFI, GetTTI, true);
- }
-};
-
-} // namespace
-
-char CGProfileLegacyPass::ID = 0;
-
-INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false,
- false)
-
-ModulePass *llvm::createCGProfileLegacyPass() {
- return new CGProfileLegacyPass();
-}
-
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
FunctionAnalysisManager &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index e5c0705b916e..adc007dacae4 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -103,47 +103,6 @@ static void parseCHRFilterFiles() {
}
namespace {
-class ControlHeightReductionLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- ControlHeightReductionLegacyPass() : FunctionPass(ID) {
- initializeControlHeightReductionLegacyPassPass(
- *PassRegistry::getPassRegistry());
- parseCHRFilterFiles();
- }
-
- bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addRequired<RegionInfoPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-};
-} // end anonymous namespace
-
-char ControlHeightReductionLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
- "chr",
- "Reduce control height in the hot paths",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
-INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
- "chr",
- "Reduce control height in the hot paths",
- false, false)
-
-FunctionPass *llvm::createControlHeightReductionLegacyPass() {
- return new ControlHeightReductionLegacyPass();
-}
-
-namespace {
struct CHRStats {
CHRStats() = default;
@@ -2083,18 +2042,6 @@ bool CHR::run() {
return Changed;
}
-bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
- BlockFrequencyInfo &BFI =
- getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- ProfileSummaryInfo &PSI =
- getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
- std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
- std::make_unique<OptimizationRemarkEmitter>(&F);
- return CHR(F, BFI, DT, PSI, RI, *OwnedORE).run();
-}
-
namespace llvm {
ControlHeightReductionPass::ControlHeightReductionPass() {
diff --git a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
index 2091881c29fe..d7561c193aa3 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -163,42 +163,11 @@ public:
}
}; // End of InstrOrderFile struct
-
-class InstrOrderFileLegacyPass : public ModulePass {
-public:
- static char ID;
-
- InstrOrderFileLegacyPass() : ModulePass(ID) {
- initializeInstrOrderFileLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-};
-
} // End anonymous namespace
-bool InstrOrderFileLegacyPass::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
-
- return InstrOrderFile().run(M);
-}
-
PreservedAnalyses
InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) {
if (InstrOrderFile().run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
-
-INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile",
- "Instrumentation for Order File", false, false)
-INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile",
- "Instrumentation for Order File", false, false)
-
-char InstrOrderFileLegacyPass::ID = 0;
-
-ModulePass *llvm::createInstrOrderFilePass() {
- return new InstrOrderFileLegacyPass();
-}
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 3572cb3b50e2..5b7aa304b987 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -147,35 +147,6 @@ cl::opt<bool> SkipRetExitBlock(
"skip-ret-exit-block", cl::init(true),
cl::desc("Suppress counter promotion if exit blocks contain ret."));
-class InstrProfilingLegacyPass : public ModulePass {
- InstrProfiling InstrProf;
-
-public:
- static char ID;
-
- InstrProfilingLegacyPass() : ModulePass(ID) {}
- InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false)
- : ModulePass(ID), InstrProf(Options, IsCS) {
- initializeInstrProfilingLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- StringRef getPassName() const override {
- return "Frontend instrumentation-based coverage lowering";
- }
-
- bool runOnModule(Module &M) override {
- auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
- return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
- return InstrProf.run(M, GetTLI);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-};
-
///
/// A helper class to promote one counter RMW operation in the loop
/// into register update.
@@ -439,21 +410,6 @@ PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::none();
}
-char InstrProfilingLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(InstrProfilingLegacyPass, "instrprof",
- "Frontend instrumentation-based coverage lowering.",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(InstrProfilingLegacyPass, "instrprof",
- "Frontend instrumentation-based coverage lowering.", false,
- false)
-
-ModulePass *
-llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
- bool IsCS) {
- return new InstrProfilingLegacyPass(Options, IsCS);
-}
-
bool InstrProfiling::lowerIntrinsics(Function *F) {
bool MadeChange = false;
PromotionCandidates.clear();
diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 9ff0e632bd7f..bd575b6cf3b0 100644
--- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -94,11 +94,6 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeMemProfilerLegacyPassPass(Registry);
initializeModuleMemProfilerLegacyPassPass(Registry);
initializeBoundsCheckingLegacyPassPass(Registry);
- initializeControlHeightReductionLegacyPassPass(Registry);
- initializeCGProfileLegacyPassPass(Registry);
- initializeInstrOrderFileLegacyPassPass(Registry);
- initializeInstrProfilingLegacyPassPass(Registry);
- initializeModuleSanitizerCoverageLegacyPassPass(Registry);
initializeDataFlowSanitizerLegacyPassPass(Registry);
}
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4d72f6c3d1a9..4606bd5de6c3 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -417,6 +417,14 @@ static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
0x01000000000, // OriginBase
};
+// aarch64 FreeBSD
+static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = {
+ 0x1800000000000, // AndMask
+ 0x0400000000000, // XorMask
+ 0x0200000000000, // ShadowBase
+ 0x0700000000000, // OriginBase
+};
+
// i386 FreeBSD
static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
0x000180000000, // AndMask
@@ -466,6 +474,11 @@ static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
&Linux_AArch64_MemoryMapParams,
};
+static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = {
+ nullptr,
+ &FreeBSD_AArch64_MemoryMapParams,
+};
+
static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
&FreeBSD_I386_MemoryMapParams,
&FreeBSD_X86_64_MemoryMapParams,
@@ -894,6 +907,9 @@ void MemorySanitizer::initializeModule(Module &M) {
switch (TargetTriple.getOS()) {
case Triple::FreeBSD:
switch (TargetTriple.getArch()) {
+ case Triple::aarch64:
+ MapParams = FreeBSD_ARM_MemoryMapParams.bits64;
+ break;
case Triple::x86_64:
MapParams = FreeBSD_X86_MemoryMapParams.bits64;
break;
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 3a29cd70e42e..c4512d0222cd 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -210,12 +210,11 @@ cl::opt<bool>
// Command line option to enable/disable the warning about a hash mismatch in
// the profile data for Comdat functions, which often turns out to be false
// positive due to the pre-instrumentation inline.
-static cl::opt<bool>
- NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
- cl::Hidden,
- cl::desc("The option is used to turn on/off "
- "warnings about hash mismatch for comdat "
- "functions."));
+static cl::opt<bool> NoPGOWarnMismatchComdatWeak(
+ "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
+ cl::desc("The option is used to turn on/off "
+ "warnings about hash mismatch for comdat "
+ "or weak functions."));
// Command line option to enable/disable select instruction instrumentation.
static cl::opt<bool>
@@ -287,6 +286,11 @@ static cl::opt<unsigned> PGOVerifyBFICutoff(
cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
"profile count value is below."));
+static cl::opt<std::string> PGOTraceFuncHash(
+ "pgo-trace-func-hash", cl::init("-"), cl::Hidden,
+ cl::value_desc("function name"),
+ cl::desc("Trace the hash of the function with this name."));
+
namespace llvm {
// Command line option to turn on CFG dot dump after profile annotation.
// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
@@ -630,6 +634,10 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
<< ", High32 CRC = " << JCH.getCRC());
}
LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
+
+ if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
+ dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
+ << " in building " << F.getParent()->getSourceFileName() << "\n";
}
// Check if we can safely rename this Comdat function.
@@ -832,8 +840,6 @@ static void instrumentOneFunc(
auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
FuncInfo.FunctionHash);
if (PGOFunctionEntryCoverage) {
- assert(!IsCS &&
- "entry coverge does not support context-sensitive instrumentation");
auto &EntryBB = F.getEntryBlock();
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
// llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
@@ -1216,8 +1222,9 @@ static void annotateFunctionWithHashMismatch(Function &F,
bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
bool &AllMinusOnes) {
auto &Ctx = M->getContext();
- Expected<InstrProfRecord> Result =
- PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
+ uint64_t MismatchedFuncSum = 0;
+ Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
+ FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
if (Error E = Result.takeError()) {
handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
auto Err = IPE.get();
@@ -1233,10 +1240,11 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
SkipWarning =
NoPGOWarnMismatch ||
- (NoPGOWarnMismatchComdat &&
- (F.hasComdat() ||
+ (NoPGOWarnMismatchComdatWeak &&
+ (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
- LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
+ LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
+ << " skip=" << SkipWarning << ")");
// Emit function metadata indicating PGO profile mismatch.
annotateFunctionWithHashMismatch(F, M->getContext());
}
@@ -1245,9 +1253,11 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
if (SkipWarning)
return;
- std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
- std::string(" Hash = ") +
- std::to_string(FuncInfo.FunctionHash);
+ std::string Msg =
+ IPE.message() + std::string(" ") + F.getName().str() +
+ std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
+ std::string(" up to ") + std::to_string(MismatchedFuncSum) +
+ std::string(" count discarded");
Ctx.diagnose(
DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index d9d11cc90d3d..3ca476e74953 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -278,53 +278,6 @@ private:
const SpecialCaseList *Allowlist;
const SpecialCaseList *Blocklist;
};
-
-class ModuleSanitizerCoverageLegacyPass : public ModulePass {
-public:
- ModuleSanitizerCoverageLegacyPass(
- const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
- const std::vector<std::string> &AllowlistFiles =
- std::vector<std::string>(),
- const std::vector<std::string> &BlocklistFiles =
- std::vector<std::string>())
- : ModulePass(ID), Options(Options) {
- if (AllowlistFiles.size() > 0)
- Allowlist = SpecialCaseList::createOrDie(AllowlistFiles,
- *vfs::getRealFileSystem());
- if (BlocklistFiles.size() > 0)
- Blocklist = SpecialCaseList::createOrDie(BlocklistFiles,
- *vfs::getRealFileSystem());
- initializeModuleSanitizerCoverageLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
- bool runOnModule(Module &M) override {
- ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(),
- Blocklist.get());
- auto DTCallback = [this](Function &F) -> const DominatorTree * {
- return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
- };
- auto PDTCallback = [this](Function &F) -> const PostDominatorTree * {
- return &this->getAnalysis<PostDominatorTreeWrapperPass>(F)
- .getPostDomTree();
- };
- return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback);
- }
-
- static char ID; // Pass identification, replacement for typeid
- StringRef getPassName() const override { return "ModuleSanitizerCoverage"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- }
-
-private:
- SanitizerCoverageOptions Options;
-
- std::unique_ptr<SpecialCaseList> Allowlist;
- std::unique_ptr<SpecialCaseList> Blocklist;
-};
-
} // namespace
PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M,
@@ -1075,20 +1028,3 @@ ModuleSanitizerCoverage::getSectionEnd(const std::string &Section) const {
return "\1section$end$__DATA$__" + Section;
return "__stop___" + Section;
}
-
-char ModuleSanitizerCoverageLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov",
- "Pass for instrumenting coverage on functions", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov",
- "Pass for instrumenting coverage on functions", false,
- false)
-ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass(
- const SanitizerCoverageOptions &Options,
- const std::vector<std::string> &AllowlistFiles,
- const std::vector<std::string> &BlocklistFiles) {
- return new ModuleSanitizerCoverageLegacyPass(Options, AllowlistFiles,
- BlocklistFiles);
-}
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 4c42869dbd58..3f0dad7ee769 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -776,6 +776,11 @@ struct DSEState {
// fall back to CFG scan starting from all non-unreachable roots.
bool AnyUnreachableExit;
+ // Whether or not we should iterate on removing dead stores at the end of the
+ // function due to removing a store causing a previously captured pointer to
+ // no longer be captured.
+ bool ShouldIterateEndOfFunctionDSE;
+
// Class contains self-reference, make sure it's not copied/moved.
DSEState(const DSEState &) = delete;
DSEState &operator=(const DSEState &) = delete;
@@ -1103,9 +1108,8 @@ struct DSEState {
return {std::make_pair(MemoryLocation(Ptr, Len), false)};
if (auto *CB = dyn_cast<CallBase>(I)) {
- if (isFreeCall(I, &TLI))
- return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)),
- true)};
+ if (Value *FreedOp = getFreedOperand(CB, &TLI))
+ return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
}
return None;
@@ -1114,9 +1118,9 @@ struct DSEState {
/// Returns true if \p I is a memory terminator instruction like
/// llvm.lifetime.end or free.
bool isMemTerminatorInst(Instruction *I) const {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
- return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) ||
- isFreeCall(I, &TLI);
+ auto *CB = dyn_cast<CallBase>(I);
+ return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
+ getFreedOperand(CB, &TLI) != nullptr);
}
/// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
@@ -1598,6 +1602,14 @@ struct DSEState {
if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
SkipStores.insert(MD);
+ if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
+ if (SI->getValueOperand()->getType()->isPointerTy()) {
+ const Value *UO = getUnderlyingObject(SI->getValueOperand());
+ if (CapturedBeforeReturn.erase(UO))
+ ShouldIterateEndOfFunctionDSE = true;
+ InvisibleToCallerAfterRet.erase(UO);
+ }
+ }
}
Updater.removeMemoryAccess(MA);
@@ -1671,33 +1683,36 @@ struct DSEState {
LLVM_DEBUG(
dbgs()
<< "Trying to eliminate MemoryDefs at the end of the function\n");
- for (MemoryDef *Def : llvm::reverse(MemDefs)) {
- if (SkipStores.contains(Def))
- continue;
+ do {
+ ShouldIterateEndOfFunctionDSE = false;
+ for (MemoryDef *Def : llvm::reverse(MemDefs)) {
+ if (SkipStores.contains(Def))
+ continue;
- Instruction *DefI = Def->getMemoryInst();
- auto DefLoc = getLocForWrite(DefI);
- if (!DefLoc || !isRemovable(DefI))
- continue;
+ Instruction *DefI = Def->getMemoryInst();
+ auto DefLoc = getLocForWrite(DefI);
+ if (!DefLoc || !isRemovable(DefI))
+ continue;
- // NOTE: Currently eliminating writes at the end of a function is limited
- // to MemoryDefs with a single underlying object, to save compile-time. In
- // practice it appears the case with multiple underlying objects is very
- // uncommon. If it turns out to be important, we can use
- // getUnderlyingObjects here instead.
- const Value *UO = getUnderlyingObject(DefLoc->Ptr);
- if (!isInvisibleToCallerAfterRet(UO))
- continue;
+ // NOTE: Currently eliminating writes at the end of a function is
+ // limited to MemoryDefs with a single underlying object, to save
+ // compile-time. In practice it appears the case with multiple
+ // underlying objects is very uncommon. If it turns out to be important,
+ // we can use getUnderlyingObjects here instead.
+ const Value *UO = getUnderlyingObject(DefLoc->Ptr);
+ if (!isInvisibleToCallerAfterRet(UO))
+ continue;
- if (isWriteAtEndOfFunction(Def)) {
- // See through pointer-to-pointer bitcasts
- LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
- "of the function\n");
- deleteDeadInstruction(DefI);
- ++NumFastStores;
- MadeChange = true;
+ if (isWriteAtEndOfFunction(Def)) {
+ // See through pointer-to-pointer bitcasts
+ LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
+ "of the function\n");
+ deleteDeadInstruction(DefI);
+ ++NumFastStores;
+ MadeChange = true;
+ }
}
- }
+ } while (ShouldIterateEndOfFunctionDSE);
return MadeChange;
}
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index a9ca0bdc8f7b..9698ed97379e 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1738,7 +1738,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// through *explicit* control flow. We have to eliminate the possibility of
// implicit exits (see below) before we know it's truly exact.
const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
+ if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
return false;
assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
@@ -1769,7 +1769,8 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
return true;
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
- if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
+ if (isa<SCEVCouldNotCompute>(ExitCount) ||
+ !Rewriter.isSafeToExpand(ExitCount))
return true;
assert(SE->isLoopInvariant(ExitCount, L) &&
diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index b54cf5e7cb20..328615011ceb 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -1451,7 +1451,7 @@ bool LoopConstrainer::run() {
return false;
}
- if (!isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt, SE)) {
+ if (!Expander.isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt)) {
LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the"
<< " preloop exit limit " << *ExitPreLoopAtSCEV
<< " at block " << InsertPt->getParent()->getName()
@@ -1478,7 +1478,7 @@ bool LoopConstrainer::run() {
return false;
}
- if (!isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt, SE)) {
+ if (!Expander.isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt)) {
LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the"
<< " main loop exit limit " << *ExitMainLoopAtSCEV
<< " at block " << InsertPt->getParent()->getName()
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 9590fbbb1994..fd2eaee8b47d 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -388,15 +388,15 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
continue;
+ BasicBlock *BB = P.InsertPt->getParent();
+ SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
P.LSCEVAddRec->getStepRecurrence(*SE)));
- if (!isSafeToExpand(NextLSCEV, *SE))
+ if (!SCEVE.isSafeToExpand(NextLSCEV))
continue;
- BasicBlock *BB = P.InsertPt->getParent();
Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/);
- SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
IRBuilder<> Builder(P.InsertPt);
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index d908c151d9f2..3ed022f65d9a 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1129,7 +1129,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
- if (!isSafeToExpand(Start, *SE))
+ if (!Expander.isSafeToExpand(Start))
return Changed;
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
@@ -1163,7 +1163,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
- if (!isSafeToExpand(NumBytesS, *SE))
+ if (!Expander.isSafeToExpand(NumBytesS))
return Changed;
Value *NumBytes =
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 1d3023d04463..18daa4295224 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -288,7 +288,6 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
Vec = &CurrentLoop->getSubLoops();
}
LoopList.push_back(CurrentLoop);
- return;
}
namespace {
diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index d0ee5b47a8ca..b327d38d2a84 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -275,7 +275,8 @@ class LoopPredication {
/// which is that an expression *can be made* invariant via SCEVExpander.
/// Thus, this version is only suitable for finding an insert point to be be
/// passed to SCEVExpander!
- Instruction *findInsertPt(Instruction *User, ArrayRef<const SCEV*> Ops);
+ Instruction *findInsertPt(const SCEVExpander &Expander, Instruction *User,
+ ArrayRef<const SCEV *> Ops);
/// Return true if the value is known to produce a single fixed value across
/// all iterations on which it executes. Note that this does not imply
@@ -418,13 +419,14 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander,
return Builder.getFalse();
}
- Value *LHSV = Expander.expandCodeFor(LHS, Ty, findInsertPt(Guard, {LHS}));
- Value *RHSV = Expander.expandCodeFor(RHS, Ty, findInsertPt(Guard, {RHS}));
+ Value *LHSV =
+ Expander.expandCodeFor(LHS, Ty, findInsertPt(Expander, Guard, {LHS}));
+ Value *RHSV =
+ Expander.expandCodeFor(RHS, Ty, findInsertPt(Expander, Guard, {RHS}));
IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV}));
return Builder.CreateICmp(Pred, LHSV, RHSV);
}
-
// Returns true if its safe to truncate the IV to RangeCheckType.
// When the IV type is wider than the range operand type, we can still do loop
// predication, by generating SCEVs for the range and latch that are of the
@@ -516,14 +518,15 @@ Instruction *LoopPredication::findInsertPt(Instruction *Use,
return Preheader->getTerminator();
}
-Instruction *LoopPredication::findInsertPt(Instruction *Use,
- ArrayRef<const SCEV*> Ops) {
+Instruction *LoopPredication::findInsertPt(const SCEVExpander &Expander,
+ Instruction *Use,
+ ArrayRef<const SCEV *> Ops) {
// Subtlety: SCEV considers things to be invariant if the value produced is
// the same across iterations. This is not the same as being able to
// evaluate outside the loop, which is what we actually need here.
for (const SCEV *Op : Ops)
if (!SE->isLoopInvariant(Op, L) ||
- !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE))
+ !Expander.isSafeToExpandAt(Op, Preheader->getTerminator()))
return Use;
return Preheader->getTerminator();
}
@@ -589,8 +592,8 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
return None;
}
- if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
- !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
+ if (!Expander.isSafeToExpandAt(LatchStart, Guard) ||
+ !Expander.isSafeToExpandAt(LatchLimit, Guard)) {
LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
return None;
}
@@ -632,8 +635,8 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
return None;
}
- if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
- !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
+ if (!Expander.isSafeToExpandAt(LatchStart, Guard) ||
+ !Expander.isSafeToExpandAt(LatchLimit, Guard)) {
LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
return None;
}
@@ -1159,7 +1162,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L);
if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
!SE->isLoopInvariant(MinEC, L) ||
- !isSafeToExpandAt(MinEC, WidenableBR, *SE))
+ !Rewriter.isSafeToExpandAt(MinEC, WidenableBR))
return ChangedLoop;
// Subtlety: We need to avoid inserting additional uses of the WC. We know
@@ -1198,7 +1201,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
if (isa<SCEVCouldNotCompute>(ExitCount) ||
ExitCount->getType()->isPointerTy() ||
- !isSafeToExpandAt(ExitCount, WidenableBR, *SE))
+ !Rewriter.isSafeToExpandAt(ExitCount, WidenableBR))
continue;
const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index b7e0e32780b4..083f87436acd 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -576,6 +576,18 @@ public:
return false;
}
+ // TODO: Tokens may breach LCSSA form by default. However, the transform for
+ // dead exit blocks requires LCSSA form to be maintained for all values,
+ // tokens included, otherwise it may break use-def dominance (see PR56243).
+ if (!DeadExitBlocks.empty() && !L.isLCSSAForm(DT, /*IgnoreTokens*/ false)) {
+ assert(L.isLCSSAForm(DT, /*IgnoreTokens*/ true) &&
+ "LCSSA broken not by tokens?");
+ LLVM_DEBUG(dbgs() << "Give up constant terminator folding in loop "
+ << Header->getName()
+ << ": tokens uses potentially break LCSSA form.\n");
+ return false;
+ }
+
SE.forgetTopmostLoop(&L);
// Dump analysis results.
LLVM_DEBUG(dump());
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4ef7809c6681..a3434f8bc46d 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1950,6 +1950,7 @@ class LSRInstance {
Loop *const L;
MemorySSAUpdater *MSSAU;
TTI::AddressingModeKind AMK;
+ mutable SCEVExpander Rewriter;
bool Changed = false;
/// This is the insert position that the current loop's induction variable
@@ -1998,7 +1999,7 @@ class LSRInstance {
SmallVectorImpl<ChainUsers> &ChainUsersVec);
void FinalizeChain(IVChain &Chain);
void CollectChains();
- void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
+ void GenerateIVChain(const IVChain &Chain,
SmallVectorImpl<WeakTrackingVH> &DeadInsts);
void CollectInterestingTypesAndFactors();
@@ -2068,22 +2069,19 @@ class LSRInstance {
void Solve(SmallVectorImpl<const Formula *> &Solution) const;
BasicBlock::iterator
- HoistInsertPosition(BasicBlock::iterator IP,
- const SmallVectorImpl<Instruction *> &Inputs) const;
- BasicBlock::iterator
- AdjustInsertPositionForExpand(BasicBlock::iterator IP,
- const LSRFixup &LF,
- const LSRUse &LU,
- SCEVExpander &Rewriter) const;
+ HoistInsertPosition(BasicBlock::iterator IP,
+ const SmallVectorImpl<Instruction *> &Inputs) const;
+ BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+ const LSRFixup &LF,
+ const LSRUse &LU) const;
Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
- BasicBlock::iterator IP, SCEVExpander &Rewriter,
+ BasicBlock::iterator IP,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
- const Formula &F, SCEVExpander &Rewriter,
+ const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
- SCEVExpander &Rewriter,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
@@ -3183,7 +3181,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
/// Generate an add or subtract for each IVInc in a chain to materialize the IV
/// user's operand from the previous IV user's operand.
-void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
+void LSRInstance::GenerateIVChain(const IVChain &Chain,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
// Find the new IVOperand for the head of the chain. It may have been replaced
// by LSR.
@@ -3335,7 +3333,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE) &&
+ if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) &&
(!NV->getType()->isPointerTy() ||
SE.getPointerBase(N) == SE.getPointerBase(S))) {
// S is normalized, so normalize N before folding it into S
@@ -3343,6 +3341,21 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
+ } else if (L->isLoopInvariant(NV) &&
+ (!isa<Instruction>(NV) ||
+ DT.dominates(cast<Instruction>(NV), L->getHeader())) &&
+ !NV->getType()->isPointerTy()) {
+ // If we can't generally expand the expression (e.g. it contains
+ // a divide), but it is already at a loop invariant point before the
+ // loop, wrap it in an unknown (to prevent the expander from trying
+ // to re-expand in a potentially unsafe way.) The restriction to
+ // integer types is required because the unknown hides the base, and
+ // SCEV can't compute the difference of two unknown pointers.
+ N = SE.getUnknown(NV);
+ N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
+ Kind = LSRUse::ICmpZero;
+ S = SE.getMinusSCEV(N, S);
+ assert(!isa<SCEVCouldNotCompute>(S));
}
// -1 and the negations of all interesting strides (except the negation
@@ -3385,10 +3398,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
/// Insert a formula for the given expression into the given use, separating out
/// loop-variant portions from loop-invariant and loop-computable portions.
-void
-LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,
+ size_t LUIdx) {
// Mark uses whose expressions cannot be expanded.
- if (!isSafeToExpand(S, SE, /*CanonicalMode*/ false))
+ if (!Rewriter.isSafeToExpand(S))
LU.RigidFormula = true;
Formula F;
@@ -5206,11 +5219,8 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
/// Determine an input position which will be dominated by the operands and
/// which will dominate the result.
-BasicBlock::iterator
-LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
- const LSRFixup &LF,
- const LSRUse &LU,
- SCEVExpander &Rewriter) const {
+BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(
+ BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {
// Collect some instructions which must be dominated by the
// expanding replacement. These must be dominated by any operands that
// will be required in the expansion.
@@ -5273,14 +5283,13 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
/// is called "expanding").
Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
const Formula &F, BasicBlock::iterator IP,
- SCEVExpander &Rewriter,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
if (LU.RigidFormula)
return LF.OperandValToReplace;
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
- IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
+ IP = AdjustInsertPositionForExpand(IP, LF, LU);
Rewriter.setInsertPoint(&*IP);
// Inform the Rewriter if we have a post-increment use, so that it can
@@ -5452,7 +5461,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
/// to be expanded in multiple places.
void LSRInstance::RewriteForPHI(
PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
- SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
@@ -5507,8 +5516,8 @@ void LSRInstance::RewriteForPHI(
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
- Value *FullV = Expand(LU, LF, F, BB->getTerminator()->getIterator(),
- Rewriter, DeadInsts);
+ Value *FullV =
+ Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
Type *OpTy = LF.OperandValToReplace->getType();
@@ -5567,15 +5576,14 @@ void LSRInstance::RewriteForPHI(
/// is called "expanding"), and update the UserInst to reference the newly
/// expanded value.
void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
- const Formula &F, SCEVExpander &Rewriter,
+ const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
- RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts);
+ RewriteForPHI(PN, LU, LF, F, DeadInsts);
} else {
- Value *FullV =
- Expand(LU, LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
+ Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
Type *OpTy = LF.OperandValToReplace->getType();
@@ -5609,13 +5617,6 @@ void LSRInstance::ImplementSolution(
// we can remove them after we are done working.
SmallVector<WeakTrackingVH, 16> DeadInsts;
- SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr",
- false);
-#ifndef NDEBUG
- Rewriter.setDebugType(DEBUG_TYPE);
-#endif
- Rewriter.disableCanonicalMode();
- Rewriter.enableLSRMode();
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
// Mark phi nodes that terminate chains so the expander tries to reuse them.
@@ -5627,12 +5628,12 @@ void LSRInstance::ImplementSolution(
// Expand the new value definitions and update the users.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
- Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], Rewriter, DeadInsts);
+ Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
Changed = true;
}
for (const IVChain &Chain : IVChainVec) {
- GenerateIVChain(Chain, Rewriter, DeadInsts);
+ GenerateIVChain(Chain, DeadInsts);
Changed = true;
}
@@ -5697,8 +5698,10 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
const TargetTransformInfo &TTI, AssumptionCache &AC,
TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
: IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),
- MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 ?
- PreferredAddresingMode : TTI.getPreferredAddressingMode(L, &SE)) {
+ MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0
+ ? PreferredAddresingMode
+ : TTI.getPreferredAddressingMode(L, &SE)),
+ Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
@@ -5733,6 +5736,14 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
dbgs() << ":\n");
+ // Configure SCEVExpander already now, so the correct mode is used for
+ // isSafeToExpand() checks.
+#ifndef NDEBUG
+ Rewriter.setDebugType(DEBUG_TYPE);
+#endif
+ Rewriter.disableCanonicalMode();
+ Rewriter.enableLSRMode();
+
// First, perform some low-level loop optimizations.
OptimizeShadowIV();
OptimizeLoopTermCond();
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 75f0896d4845..240fb5e60687 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -142,12 +142,21 @@ XorOpnd::XorOpnd(Value *V) {
isOr = true;
}
+/// Return true if I is an instruction with the FastMathFlags that are needed
+/// for general reassociation set. This is not the same as testing
+/// Instruction::isAssociative() because it includes operations like fsub.
+/// (This routine is only intended to be called for floating-point operations.)
+static bool hasFPAssociativeFlags(Instruction *I) {
+ assert(I && I->getType()->isFPOrFPVectorTy() && "Should only check FP ops");
+ return I->hasAllowReassoc() && I->hasNoSignedZeros();
+}
+
/// Return true if V is an instruction of the specified opcode and if it
/// only has one use.
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
auto *I = dyn_cast<Instruction>(V);
if (I && I->hasOneUse() && I->getOpcode() == Opcode)
- if (!isa<FPMathOperator>(I) || I->isFast())
+ if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I))
return cast<BinaryOperator>(I);
return nullptr;
}
@@ -157,7 +166,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
auto *I = dyn_cast<Instruction>(V);
if (I && I->hasOneUse() &&
(I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2))
- if (!isa<FPMathOperator>(I) || I->isFast())
+ if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I))
return cast<BinaryOperator>(I);
return nullptr;
}
@@ -449,7 +458,8 @@ using RepeatedValue = std::pair<Value*, APInt>;
/// of the expression) if it can turn them into binary operators of the right
/// type and thus make the expression bigger.
static bool LinearizeExprTree(Instruction *I,
- SmallVectorImpl<RepeatedValue> &Ops) {
+ SmallVectorImpl<RepeatedValue> &Ops,
+ ReassociatePass::OrderedSet &ToRedo) {
assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
"Expected a UnaryOperator or BinaryOperator!");
LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
@@ -572,23 +582,32 @@ static bool LinearizeExprTree(Instruction *I,
assert((!isa<Instruction>(Op) ||
cast<Instruction>(Op)->getOpcode() != Opcode
|| (isa<FPMathOperator>(Op) &&
- !cast<Instruction>(Op)->isFast())) &&
+ !hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
"Should have been handled above!");
assert(Op->hasOneUse() && "Has uses outside the expression tree!");
// If this is a multiply expression, turn any internal negations into
- // multiplies by -1 so they can be reassociated.
- if (Instruction *Tmp = dyn_cast<Instruction>(Op))
- if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) ||
- (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) {
- LLVM_DEBUG(dbgs()
- << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
- Tmp = LowerNegateToMultiply(Tmp);
- LLVM_DEBUG(dbgs() << *Tmp << '\n');
- Worklist.push_back(std::make_pair(Tmp, Weight));
- Changed = true;
- continue;
+ // multiplies by -1 so they can be reassociated. Add any users of the
+ // newly created multiplication by -1 to the redo list, so any
+ // reassociation opportunities that are exposed will be reassociated
+ // further.
+ Instruction *Neg;
+ if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
+ (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
+ match(Op, m_Instruction(Neg))) {
+ LLVM_DEBUG(dbgs()
+ << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
+ Instruction *Mul = LowerNegateToMultiply(Neg);
+ LLVM_DEBUG(dbgs() << *Mul << '\n');
+ Worklist.push_back(std::make_pair(Mul, Weight));
+ for (User *U : Mul->users()) {
+ if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
+ ToRedo.insert(UserBO);
}
+ ToRedo.insert(Neg);
+ Changed = true;
+ continue;
+ }
// Failed to morph into an expression of the right type. This really is
// a leaf.
@@ -1141,7 +1160,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
return nullptr;
SmallVector<RepeatedValue, 8> Tree;
- MadeChange |= LinearizeExprTree(BO, Tree);
+ MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts);
SmallVector<ValueEntry, 8> Factors;
Factors.reserve(Tree.size());
for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
@@ -2206,8 +2225,9 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
if (Instruction *Res = canonicalizeNegFPConstants(I))
I = Res;
- // Don't optimize floating-point instructions unless they are 'fast'.
- if (I->getType()->isFPOrFPVectorTy() && !I->isFast())
+ // Don't optimize floating-point instructions unless they have the
+ // appropriate FastMathFlags for reassociation enabled.
+ if (I->getType()->isFPOrFPVectorTy() && !hasFPAssociativeFlags(I))
return;
// Do not reassociate boolean (i1) expressions. We want to preserve the
@@ -2320,7 +2340,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
// First, walk the expression tree, linearizing the tree, collecting the
// operand information.
SmallVector<RepeatedValue, 8> Tree;
- MadeChange |= LinearizeExprTree(I, Tree);
+ MadeChange |= LinearizeExprTree(I, Tree, RedoInsts);
SmallVector<ValueEntry, 8> Ops;
Ops.reserve(Tree.size());
for (const RepeatedValue &E : Tree)
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 51e4a5773f3e..baf407c5037b 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1702,10 +1702,20 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
auto &Context = Call->getContext();
auto &DL = Call->getModule()->getDataLayout();
auto GetBaseAndOffset = [&](Value *Derived) {
- assert(PointerToBase.count(Derived));
+ Value *Base = nullptr;
+ // Optimizations in unreachable code might substitute the real pointer
+ // with undef, poison or null-derived constant. Return null base for
+ // them to be consistent with the handling in the main algorithm in
+ // findBaseDefiningValue.
+ if (isa<Constant>(Derived))
+ Base =
+ ConstantPointerNull::get(cast<PointerType>(Derived->getType()));
+ else {
+ assert(PointerToBase.count(Derived));
+ Base = PointerToBase.find(Derived)->second;
+ }
unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
- Value *Base = PointerToBase.find(Derived)->second;
Value *Base_int = Builder.CreatePtrToInt(
Base, Type::getIntNTy(Context, IntPtrSize));
Value *Derived_int = Builder.CreatePtrToInt(
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 008ddfc72740..5ab9e25577d8 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -111,8 +111,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopLoadEliminationPass(Registry);
initializeLoopSimplifyCFGLegacyPassPass(Registry);
initializeLoopVersioningLegacyPassPass(Registry);
- initializeEntryExitInstrumenterPass(Registry);
- initializePostInlineEntryExitInstrumenterPass(Registry);
}
void LLVMAddLoopSimplifyCFGPass(LLVMPassManagerRef PM) {
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index f6525ad7de9b..0b797abefe20 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -68,11 +68,6 @@ static cl::opt<bool>
cl::desc("Allow relaxed uniform region checks"),
cl::init(true));
-static cl::opt<unsigned>
- ReorderNodeSize("structurizecfg-node-reorder-size",
- cl::desc("Limit region size for reordering nodes"),
- cl::init(100), cl::Hidden);
-
// Definition of the complex types used in this pass.
using BBValuePair = std::pair<BasicBlock *, Value *>;
@@ -267,8 +262,6 @@ class StructurizeCFG {
void orderNodes();
- void reorderNodes();
-
void analyzeLoops(RegionNode *N);
Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
@@ -427,57 +420,6 @@ void StructurizeCFG::orderNodes() {
}
}
-/// Change the node ordering to decrease the range of live values, especially
-/// the values that capture the control flow path for branches. We do this
-/// by moving blocks with a single predecessor and successor to appear after
-/// predecessor. The motivation is to move some loop exit blocks into a loop.
-/// In cases where a loop has a large number of exit blocks, this reduces the
-/// amount of values needed across the loop boundary.
-void StructurizeCFG::reorderNodes() {
- SmallVector<RegionNode *, 8> NewOrder;
- DenseMap<BasicBlock *, unsigned> MoveTo;
- BitVector Moved(Order.size());
-
- // The benefits of reordering nodes occurs for large regions.
- if (Order.size() <= ReorderNodeSize)
- return;
-
- // The algorithm works with two passes over Order. The first pass identifies
- // the blocks to move and the position to move them to. The second pass
- // creates the new order based upon this information. We move blocks with
- // a single predecessor and successor. If there are multiple candidates then
- // maintain the original order.
- BBSet Seen;
- for (int I = Order.size() - 1; I >= 0; --I) {
- auto *BB = Order[I]->getEntry();
- Seen.insert(BB);
- auto *Pred = BB->getSinglePredecessor();
- auto *Succ = BB->getSingleSuccessor();
- // Consider only those basic blocks that have a predecessor in Order and a
- // successor that exits the region. The region may contain subregions that
- // have been structurized and are not included in Order.
- if (Pred && Succ && Seen.count(Pred) && Succ == ParentRegion->getExit() &&
- !MoveTo.count(Pred)) {
- MoveTo[Pred] = I;
- Moved.set(I);
- }
- }
-
- // If no blocks have been moved then the original order is good.
- if (!Moved.count())
- return;
-
- for (size_t I = 0, E = Order.size(); I < E; ++I) {
- auto *BB = Order[I]->getEntry();
- if (MoveTo.count(BB))
- NewOrder.push_back(Order[MoveTo[BB]]);
- if (!Moved[I])
- NewOrder.push_back(Order[I]);
- }
-
- Order.assign(NewOrder);
-}
-
/// Determine the end of the loops
void StructurizeCFG::analyzeLoops(RegionNode *N) {
if (N->isSubRegion()) {
@@ -1139,7 +1081,6 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
ParentRegion = R;
orderNodes();
- reorderNodes();
collectInfos();
createFlow();
insertConditions(false);
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 079b2fc973b9..e3cb5f359e34 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -80,7 +80,7 @@ void llvm::detachDeadBlocks(
// contained within it must dominate their uses, that all uses will
// eventually be removed (they are themselves dead).
if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.replaceAllUsesWith(PoisonValue::get(I.getType()));
BB->getInstList().pop_back();
}
new UnreachableInst(BB->getContext(), BB);
@@ -188,8 +188,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// Don't break self-loops.
if (PredBB == BB) return false;
- // Don't break unwinding instructions.
- if (PredBB->getTerminator()->isExceptionalTerminator())
+
+ // Don't break unwinding instructions or terminators with other side-effects.
+ Instruction *PTI = PredBB->getTerminator();
+ if (PTI->isExceptionalTerminator() || PTI->mayHaveSideEffects())
return false;
// Can't merge if there are multiple distinct successors.
@@ -202,7 +204,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
BasicBlock *NewSucc = nullptr;
unsigned FallThruPath;
if (PredecessorWithTwoSuccessors) {
- if (!(PredBB_BI = dyn_cast<BranchInst>(PredBB->getTerminator())))
+ if (!(PredBB_BI = dyn_cast<BranchInst>(PTI)))
return false;
BranchInst *BB_JmpI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BB_JmpI || !BB_JmpI->isUnconditional())
@@ -256,7 +258,6 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
- Instruction *PTI = PredBB->getTerminator();
Instruction *STI = BB->getTerminator();
Instruction *Start = &*BB->begin();
// If there's nothing to move, mark the starting instruction as the last
@@ -1141,7 +1142,7 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
if (Preds.empty()) {
// Insert dummy values as the incoming value.
for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
- cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+ cast<PHINode>(I)->addIncoming(PoisonValue::get(I->getType()), NewBB);
}
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index c4a58f36c171..e25ec74a0572 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -270,9 +270,6 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
bool Changed = false;
- if(!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI))
- Changed |= setDoesNotFreeMemory(F);
-
if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT())
Changed |= setNonLazyBind(F);
@@ -285,14 +282,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_strchr:
case LibFunc_strrchr:
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
Changed |= setWillReturn(F);
- return Changed;
+ break;
case LibFunc_strtol:
case LibFunc_strtod:
case LibFunc_strtof:
@@ -304,7 +301,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_strcat:
case LibFunc_strncat:
Changed |= setOnlyAccessesArgMemory(F);
@@ -315,7 +312,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotAlias(F, 1);
- return Changed;
+ break;
case LibFunc_strcpy:
case LibFunc_strncpy:
Changed |= setReturnedArg(F, 0);
@@ -330,14 +327,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotAlias(F, 1);
- return Changed;
+ break;
case LibFunc_strxfrm:
Changed |= setDoesNotThrow(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_strcmp: // 0,1
case LibFunc_strspn: // 0,1
case LibFunc_strncmp: // 0,1
@@ -348,7 +345,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_strcoll:
case LibFunc_strcasecmp: // 0,1
case LibFunc_strncasecmp: //
@@ -359,7 +356,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_strstr:
case LibFunc_strpbrk:
Changed |= setOnlyAccessesArgMemory(F);
@@ -367,26 +364,26 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotThrow(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_strtok:
case LibFunc_strtok_r:
Changed |= setDoesNotThrow(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_scanf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_setbuf:
case LibFunc_setvbuf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_strndup:
Changed |= setArgNoUndef(F, 1);
LLVM_FALLTHROUGH;
@@ -398,7 +395,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_stat:
case LibFunc_statvfs:
Changed |= setRetAndArgsNoUndef(F);
@@ -406,7 +403,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_sscanf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -414,7 +411,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_sprintf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -423,7 +420,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_snprintf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -432,7 +429,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
+ break;
case LibFunc_setitimer:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -440,13 +437,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_system:
// May throw; "system" is a valid pthread cancellation point.
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_aligned_alloc:
Changed |= setAlignedAllocParam(F, 0);
Changed |= setAllocSize(F, 1, None);
@@ -464,7 +461,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setWillReturn(F);
- return Changed;
+ break;
case LibFunc_memcmp:
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyReadsMemory(F);
@@ -472,21 +469,21 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_memchr:
case LibFunc_memrchr:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setWillReturn(F);
- return Changed;
+ break;
case LibFunc_modf:
case LibFunc_modff:
case LibFunc_modfl:
Changed |= setDoesNotThrow(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_memcpy:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyAccessesArgMemory(F);
@@ -497,7 +494,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotAlias(F, 1);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_memmove:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyAccessesArgMemory(F);
@@ -506,7 +503,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_mempcpy:
case LibFunc_memccpy:
Changed |= setWillReturn(F);
@@ -519,7 +516,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotAlias(F, 1);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_memalign:
Changed |= setAllocFamily(F, "malloc");
Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Aligned |
@@ -531,19 +528,19 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setWillReturn(F);
- return Changed;
+ break;
case LibFunc_mkdir:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_mktime:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_realloc:
case LibFunc_reallocf:
case LibFunc_vec_realloc:
@@ -559,17 +556,17 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setArgNoUndef(F, 1);
- return Changed;
+ break;
case LibFunc_read:
// May throw; "read" is a valid pthread cancellation point.
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_rewind:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_rmdir:
case LibFunc_remove:
case LibFunc_realpath:
@@ -577,7 +574,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_rename:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -585,20 +582,20 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_readlink:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_write:
// May throw; "write" is a valid pthread cancellation point.
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_bcopy:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyAccessesArgMemory(F);
@@ -607,7 +604,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyReadsMemory(F, 0);
Changed |= setOnlyWritesMemory(F, 1);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_bcmp:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyAccessesArgMemory(F);
@@ -615,14 +612,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_bzero:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyWritesMemory(F, 0);
- return Changed;
+ break;
case LibFunc_calloc:
case LibFunc_vec_calloc:
Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_calloc ? "vec_malloc"
@@ -634,21 +631,21 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setWillReturn(F);
- return Changed;
+ break;
case LibFunc_chmod:
case LibFunc_chown:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_ctermid:
case LibFunc_clearerr:
case LibFunc_closedir:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_atoi:
case LibFunc_atol:
case LibFunc_atof:
@@ -657,13 +654,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyReadsMemory(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_access:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_fopen:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -672,19 +669,19 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_fdopen:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_feof:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_free:
case LibFunc_vec_free:
Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_free ? "vec_malloc"
@@ -696,7 +693,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotThrow(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_fseek:
case LibFunc_ftell:
case LibFunc_fgetc:
@@ -713,45 +710,45 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_ferror:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F);
- return Changed;
+ break;
case LibFunc_fputc:
case LibFunc_fputc_unlocked:
case LibFunc_fstat:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_frexp:
case LibFunc_frexpf:
case LibFunc_frexpl:
Changed |= setDoesNotThrow(F);
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_fstatvfs:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_fgets:
case LibFunc_fgets_unlocked:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
- return Changed;
+ break;
case LibFunc_fread:
case LibFunc_fread_unlocked:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 3);
- return Changed;
+ break;
case LibFunc_fwrite:
case LibFunc_fwrite_unlocked:
Changed |= setRetAndArgsNoUndef(F);
@@ -759,7 +756,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 3);
// FIXME: readonly #1?
- return Changed;
+ break;
case LibFunc_fputs:
case LibFunc_fputs_unlocked:
Changed |= setRetAndArgsNoUndef(F);
@@ -767,7 +764,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_fscanf:
case LibFunc_fprintf:
Changed |= setRetAndArgsNoUndef(F);
@@ -775,73 +772,73 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_fgetpos:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_getc:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_getlogin_r:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_getc_unlocked:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_getenv:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_gets:
case LibFunc_getchar:
case LibFunc_getchar_unlocked:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
- return Changed;
+ break;
case LibFunc_getitimer:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_getpwnam:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_ungetc:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_uname:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_unlink:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_unsetenv:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_utime:
case LibFunc_utimes:
Changed |= setRetAndArgsNoUndef(F);
@@ -850,13 +847,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_putc:
case LibFunc_putc_unlocked:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_puts:
case LibFunc_printf:
case LibFunc_perror:
@@ -864,23 +861,23 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_pread:
// May throw; "pread" is a valid pthread cancellation point.
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_pwrite:
// May throw; "pwrite" is a valid pthread cancellation point.
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_putchar:
case LibFunc_putchar_unlocked:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
- return Changed;
+ break;
case LibFunc_popen:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -889,18 +886,18 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_pclose:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_vscanf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_vsscanf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -908,20 +905,20 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_vfscanf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_vprintf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_vfprintf:
case LibFunc_vsprintf:
Changed |= setRetAndArgsNoUndef(F);
@@ -929,63 +926,63 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_vsnprintf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
- return Changed;
+ break;
case LibFunc_open:
// May throw; "open" is a valid pthread cancellation point.
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_opendir:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_tmpfile:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
- return Changed;
+ break;
case LibFunc_times:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_htonl:
case LibFunc_htons:
case LibFunc_ntohl:
case LibFunc_ntohs:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAccessMemory(F);
- return Changed;
+ break;
case LibFunc_lstat:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_lchown:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_qsort:
// May throw; places call through function pointer.
// Cannot give undef pointer/size
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 3);
- return Changed;
+ break;
case LibFunc_dunder_strndup:
Changed |= setArgNoUndef(F, 1);
LLVM_FALLTHROUGH;
@@ -995,28 +992,28 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setWillReturn(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_dunder_strtok_r:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_under_IO_getc:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_under_IO_putc:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_dunder_isoc99_scanf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_stat64:
case LibFunc_lstat64:
case LibFunc_statvfs64:
@@ -1025,7 +1022,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_dunder_isoc99_sscanf:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -1033,7 +1030,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_fopen64:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
@@ -1042,30 +1039,30 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 0);
Changed |= setOnlyReadsMemory(F, 1);
- return Changed;
+ break;
case LibFunc_fseeko64:
case LibFunc_ftello64:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
- return Changed;
+ break;
case LibFunc_tmpfile64:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setRetDoesNotAlias(F);
- return Changed;
+ break;
case LibFunc_fstat64:
case LibFunc_fstatvfs64:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_open64:
// May throw; "open" is a valid pthread cancellation point.
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setOnlyReadsMemory(F, 0);
- return Changed;
+ break;
case LibFunc_gettimeofday:
// Currently some platforms have the restrict keyword on the arguments to
// gettimeofday. To be conservative, do not add noalias to gettimeofday's
@@ -1074,7 +1071,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
- return Changed;
+ break;
case LibFunc_memset_pattern4:
case LibFunc_memset_pattern8:
case LibFunc_memset_pattern16:
@@ -1089,18 +1086,18 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotThrow(F);
- return Changed;
+ break;
// int __nvvm_reflect(const char *)
case LibFunc_nvvm_reflect:
Changed |= setRetAndArgsNoUndef(F);
Changed |= setDoesNotAccessMemory(F);
Changed |= setDoesNotThrow(F);
- return Changed;
+ break;
case LibFunc_ldexp:
case LibFunc_ldexpf:
case LibFunc_ldexpl:
Changed |= setWillReturn(F);
- return Changed;
+ break;
case LibFunc_abs:
case LibFunc_acos:
case LibFunc_acosf:
@@ -1227,12 +1224,17 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setDoesNotFreeMemory(F);
Changed |= setOnlyWritesMemory(F);
Changed |= setWillReturn(F);
- return Changed;
+ break;
default:
// FIXME: It'd be really nice to cover all the library functions we're
// aware of here.
- return false;
+ break;
}
+ // We have to do this step after AllocKind has been inferred on functions so
+ // we can reliably identify free-like and realloc-like functions.
+ if (!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI))
+ Changed |= setDoesNotFreeMemory(F);
+ return Changed;
}
static void setArgExtAttr(Function &F, unsigned ArgNo,
diff --git a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
index f229d4bf14e9..9101a1e41f7b 100644
--- a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
+++ b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -65,23 +65,6 @@ static bool canonicalizeAliases(Module &M) {
canonicalizeAlias(&GA, Changed);
return Changed;
}
-
-// Legacy pass that canonicalizes aliases.
-class CanonicalizeAliasesLegacyPass : public ModulePass {
-
-public:
- /// Pass identification, replacement for typeid
- static char ID;
-
- /// Specify pass name for debug output
- StringRef getPassName() const override { return "Canonicalize Aliases"; }
-
- explicit CanonicalizeAliasesLegacyPass() : ModulePass(ID) {}
-
- bool runOnModule(Module &M) override { return canonicalizeAliases(M); }
-};
-char CanonicalizeAliasesLegacyPass::ID = 0;
-
} // anonymous namespace
PreservedAnalyses CanonicalizeAliasesPass::run(Module &M,
@@ -91,14 +74,3 @@ PreservedAnalyses CanonicalizeAliasesPass::run(Module &M,
return PreservedAnalyses::none();
}
-
-INITIALIZE_PASS_BEGIN(CanonicalizeAliasesLegacyPass, "canonicalize-aliases",
- "Canonicalize aliases", false, false)
-INITIALIZE_PASS_END(CanonicalizeAliasesLegacyPass, "canonicalize-aliases",
- "Canonicalize aliases", false, false)
-
-namespace llvm {
-ModulePass *createCanonicalizeAliasesPass() {
- return new CanonicalizeAliasesLegacyPass();
-}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 8f053cd56e0e..1d348213bfdb 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -206,9 +206,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
};
// Avoid cloning types, compile units, and (other) subprograms.
- for (DISubprogram *ISP : DIFinder->subprograms())
- if (ISP != SPClonedWithinModule)
+ SmallPtrSet<const DISubprogram *, 16> MappedToSelfSPs;
+ for (DISubprogram *ISP : DIFinder->subprograms()) {
+ if (ISP != SPClonedWithinModule) {
mapToSelfIfNew(ISP);
+ MappedToSelfSPs.insert(ISP);
+ }
+ }
+
+ // If a subprogram isn't going to be cloned skip its lexical blocks as well.
+ for (DIScope *S : DIFinder->scopes()) {
+ auto *LScope = dyn_cast<DILocalScope>(S);
+ if (LScope && MappedToSelfSPs.count(LScope->getSubprogram()))
+ mapToSelfIfNew(S);
+ }
for (DICompileUnit *CU : DIFinder->compile_units())
mapToSelfIfNew(CU);
@@ -723,14 +734,14 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
// If the loops above have made these phi nodes have 0 or 1 operand,
- // replace them with undef or the input value. We must do this for
+ // replace them with poison or the input value. We must do this for
// correctness, because 0-operand phis are not valid.
PN = cast<PHINode>(NewBB->begin());
if (PN->getNumIncomingValues() == 0) {
BasicBlock::iterator I = NewBB->begin();
BasicBlock::const_iterator OldI = OldBB->begin();
while ((PN = dyn_cast<PHINode>(I++))) {
- Value *NV = UndefValue::get(PN->getType());
+ Value *NV = PoisonValue::get(PN->getType());
PN->replaceAllUsesWith(NV);
assert(VMap[&*OldI] == PN && "VMap mismatch");
VMap[&*OldI] = NV;
diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index e3e8f63383df..60f910bceab8 100644
--- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -117,65 +117,6 @@ static bool runOnFunction(Function &F, bool PostInlining) {
return Changed;
}
-namespace {
-struct EntryExitInstrumenter : public FunctionPass {
- static char ID;
- EntryExitInstrumenter() : FunctionPass(ID) {
- initializeEntryExitInstrumenterPass(*PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- }
- bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); }
-};
-char EntryExitInstrumenter::ID = 0;
-
-struct PostInlineEntryExitInstrumenter : public FunctionPass {
- static char ID;
- PostInlineEntryExitInstrumenter() : FunctionPass(ID) {
- initializePostInlineEntryExitInstrumenterPass(
- *PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- }
- bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); }
-};
-char PostInlineEntryExitInstrumenter::ID = 0;
-}
-
-INITIALIZE_PASS_BEGIN(
- EntryExitInstrumenter, "ee-instrument",
- "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(
- EntryExitInstrumenter, "ee-instrument",
- "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)",
- false, false)
-
-INITIALIZE_PASS_BEGIN(
- PostInlineEntryExitInstrumenter, "post-inline-ee-instrument",
- "Instrument function entry/exit with calls to e.g. mcount() "
- "(post inlining)",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(
- PostInlineEntryExitInstrumenter, "post-inline-ee-instrument",
- "Instrument function entry/exit with calls to e.g. mcount() "
- "(post inlining)",
- false, false)
-
-FunctionPass *llvm::createEntryExitInstrumenterPass() {
- return new EntryExitInstrumenter();
-}
-
-FunctionPass *llvm::createPostInlineEntryExitInstrumenterPass() {
- return new PostInlineEntryExitInstrumenter();
-}
-
PreservedAnalyses
llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) {
runOnFunction(F, PostInlining);
diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp
index 7b8d8553bac2..7509fde6df9d 100644
--- a/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -301,9 +301,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
LLVM_DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
- if (!SI->isSimple()) {
- LLVM_DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
- return false; // no volatile/atomic accesses.
+ if (SI->isVolatile()) {
+ LLVM_DEBUG(dbgs() << "Store is volatile! Can not evaluate.\n");
+ return false; // no volatile accesses.
}
Constant *Ptr = getVal(SI->getOperand(1));
Constant *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI);
@@ -337,10 +337,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
if (!Res.first->second.write(Val, Offset, DL))
return false;
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
- if (!LI->isSimple()) {
+ if (LI->isVolatile()) {
LLVM_DEBUG(
- dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
- return false; // no volatile/atomic accesses.
+ dbgs() << "Found a Load! Volatile load, can not evaluate.\n");
+ return false; // no volatile accesses.
}
Constant *Ptr = getVal(LI->getOperand(0));
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 193806d9cc87..8e6d4626c9fd 100644
--- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -35,6 +35,13 @@ bool FunctionImportGlobalProcessing::doImportAsDefinition(
bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
const GlobalValue *SGV, ValueInfo VI) {
assert(SGV->hasLocalLinkage());
+
+ // Ifuncs and ifunc alias does not have summary.
+ if (isa<GlobalIFunc>(SGV) ||
+ (isa<GlobalAlias>(SGV) &&
+ isa<GlobalIFunc>(cast<GlobalAlias>(SGV)->getAliaseeObject())))
+ return false;
+
// Both the imported references and the original local variable must
// be promoted.
if (!isPerformingImport() && !isModuleExporting())
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 2fb00f95b749..00387ec426bf 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -2194,9 +2194,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
CI->setTailCallKind(ChildTCK);
InlinedMustTailCalls |= CI->isMustTailCall();
- // Calls inlined through a 'nounwind' call site should be marked
- // 'nounwind'.
- if (MarkNoUnwind)
+ // Call sites inlined through a 'nounwind' call site should be
+ // 'nounwind' as well. However, avoid marking call sites explicitly
+ // where possible. This helps expose more opportunities for CSE after
+ // inlining, commonly when the callee is an intrinsic.
+ if (MarkNoUnwind && !CI->doesNotThrow())
CI->setDoesNotThrow();
}
}
@@ -2625,7 +2627,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
} else if (!CB.use_empty()) {
// No returns, but something is using the return value of the call. Just
// nuke the result.
- CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
+ CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
}
// Since we are now done with the Call/Invoke, we can delete it.
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index b203259db1c6..2f1d0c2f9012 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -439,6 +439,10 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
return true;
}
+ if (auto *CB = dyn_cast<CallBase>(I))
+ if (isRemovableAlloc(CB, TLI))
+ return true;
+
if (!I->willReturn())
return false;
@@ -489,16 +493,13 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
}
}
- if (isAllocationFn(I, TLI) && isAllocRemovable(cast<CallBase>(I), TLI))
- return true;
-
- if (CallInst *CI = isFreeCall(I, TLI))
- if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
- return C->isNullValue() || isa<UndefValue>(C);
-
- if (auto *Call = dyn_cast<CallBase>(I))
+ if (auto *Call = dyn_cast<CallBase>(I)) {
+ if (Value *FreedOp = getFreedOperand(Call, TLI))
+ if (Constant *C = dyn_cast<Constant>(FreedOp))
+ return C->isNullValue() || isa<UndefValue>(C);
if (isMathLibCallNoop(Call, TLI))
return true;
+ }
// Non-volatile atomic loads from constants can be removed.
if (auto *LI = dyn_cast<LoadInst>(I))
@@ -637,7 +638,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
// won't prove fruitful.
if (!Visited.insert(I).second) {
// Break the cycle and delete the instruction and its operands.
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
(void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI, MSSAU);
return true;
}
@@ -750,8 +751,8 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
// If BB has single-entry PHI nodes, fold them.
while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
Value *NewVal = PN->getIncomingValue(0);
- // Replace self referencing PHI with undef, it must be dead.
- if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
+ // Replace self referencing PHI with poison, it must be dead.
+ if (NewVal == PN) NewVal = PoisonValue::get(PN->getType());
PN->replaceAllUsesWith(NewVal);
PN->eraseFromParent();
}
@@ -2105,7 +2106,7 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
// Delete the next to last instruction.
Instruction *Inst = &*--EndInst->getIterator();
if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
- Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
if (Inst->isEHPad() || Inst->getType()->isTokenTy()) {
EndInst = Inst;
continue;
@@ -2144,7 +2145,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA,
BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end();
while (BBI != BBE) {
if (!BBI->use_empty())
- BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
BB->getInstList().erase(BBI++);
++NumInstrsRemoved;
}
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 0f33559c7e70..597c88ad13df 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -622,7 +622,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// We only need to split loop exit edges.
Loop *PredLoop = LI->getLoopFor(ExitPred);
if (!PredLoop || PredLoop->contains(Exit) ||
- ExitPred->getTerminator()->isIndirectTerminator())
+ isa<IndirectBrInst>(ExitPred->getTerminator()))
continue;
SplitLatchEdge |= L->getLoopLatch() == ExitPred;
BasicBlock *ExitSplit = SplitCriticalEdge(
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 55d5c733733b..2ff8a3f7b228 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -127,7 +127,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
// If the loop is branched to from an indirect terminator, we won't
// be able to fully transform the loop, because it prohibits
// edge splitting.
- if (P->getTerminator()->isIndirectTerminator())
+ if (isa<IndirectBrInst>(P->getTerminator()))
return nullptr;
// Keep track of it.
@@ -256,7 +256,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
if (PN->getIncomingValue(i) != PN ||
!L->contains(PN->getIncomingBlock(i))) {
// We can't split indirect control flow edges.
- if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator())
+ if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
return nullptr;
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
}
@@ -375,7 +375,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
std::vector<BasicBlock*> BackedgeBlocks;
for (BasicBlock *P : predecessors(Header)) {
// Indirect edges cannot be split, so we must fail if we find one.
- if (P->getTerminator()->isIndirectTerminator())
+ if (isa<IndirectBrInst>(P->getTerminator()))
return nullptr;
if (P != Preheader) BackedgeBlocks.push_back(P);
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 82f993b4ceab..349063dd5e89 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -602,10 +602,10 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
// loop will be already eliminated and we have less work to do but according
// to API doc of User::dropAllReferences only valid operation after dropping
// references, is deletion. So let's substitute all usages of
- // instruction from the loop with undef value of corresponding type first.
+ // instruction from the loop with poison value of corresponding type first.
for (auto *Block : L->blocks())
for (Instruction &I : *Block) {
- auto *Undef = UndefValue::get(I.getType());
+ auto *Poison = PoisonValue::get(I.getType());
for (Use &U : llvm::make_early_inc_range(I.uses())) {
if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
if (L->contains(Usr->getParent()))
@@ -615,7 +615,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
if (DT)
assert(!DT->isReachableFromEntry(U) &&
"Unexpected user in reachable block");
- U.set(Undef);
+ U.set(Poison);
}
auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
if (!DVI)
@@ -1357,7 +1357,7 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
if (isa<SCEVCouldNotCompute>(ExitValue) ||
!SE->isLoopInvariant(ExitValue, L) ||
- !isSafeToExpand(ExitValue, *SE)) {
+ !Rewriter.isSafeToExpand(ExitValue)) {
// TODO: This should probably be sunk into SCEV in some way; maybe a
// getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
// most SCEV expressions and other recurrence types (e.g. shift
@@ -1370,7 +1370,7 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
if (isa<SCEVCouldNotCompute>(ExitValue) ||
!SE->isLoopInvariant(ExitValue, L) ||
- !isSafeToExpand(ExitValue, *SE))
+ !Rewriter.isSafeToExpand(ExitValue))
continue;
}
diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
index 9914a5ca6c5e..2247b8107739 100644
--- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
@@ -31,7 +31,7 @@ bool llvm::lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
Value *Res = Builder.CreateSelect(Equal, Val, Orig);
Builder.CreateStore(Res, Ptr);
- Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0);
+ Res = Builder.CreateInsertValue(PoisonValue::get(CXI->getType()), Orig, 0);
Res = Builder.CreateInsertValue(Res, Equal, 1);
CXI->replaceAllUsesWith(Res);
diff --git a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
index deaee467531d..d4ab4504064f 100644
--- a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -81,26 +81,6 @@ bool llvm::nameUnamedGlobals(Module &M) {
return Changed;
}
-namespace {
-
-// Legacy pass that provides a name to every anon globals.
-class NameAnonGlobalLegacyPass : public ModulePass {
-
-public:
- /// Pass identification, replacement for typeid
- static char ID;
-
- /// Specify pass name for debug output
- StringRef getPassName() const override { return "Name Anon Globals"; }
-
- explicit NameAnonGlobalLegacyPass() : ModulePass(ID) {}
-
- bool runOnModule(Module &M) override { return nameUnamedGlobals(M); }
-};
-char NameAnonGlobalLegacyPass::ID = 0;
-
-} // anonymous namespace
-
PreservedAnalyses NameAnonGlobalPass::run(Module &M,
ModuleAnalysisManager &AM) {
if (!nameUnamedGlobals(M))
@@ -108,14 +88,3 @@ PreservedAnalyses NameAnonGlobalPass::run(Module &M,
return PreservedAnalyses::none();
}
-
-INITIALIZE_PASS_BEGIN(NameAnonGlobalLegacyPass, "name-anon-globals",
- "Provide a name to nameless globals", false, false)
-INITIALIZE_PASS_END(NameAnonGlobalLegacyPass, "name-anon-globals",
- "Provide a name to nameless globals", false, false)
-
-namespace llvm {
-ModulePass *createNameAnonGlobalPass() {
- return new NameAnonGlobalLegacyPass();
-}
-}
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 0c8bf3827256..372cd74ea01d 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -2568,9 +2568,7 @@ namespace {
// only needed when the expression includes some subexpression that is not IV
// derived.
//
-// Currently, we only allow division by a nonzero constant here. If this is
-// inadequate, we could easily allow division by SCEVUnknown by using
-// ValueTracking to check isKnownNonZero().
+// Currently, we only allow division by a value provably non-zero here.
//
// We cannot generally expand recurrences unless the step dominates the loop
// header. The expander handles the special case of affine recurrences by
@@ -2588,8 +2586,7 @@ struct SCEVFindUnsafe {
bool follow(const SCEV *S) {
if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
- if (!SC || SC->getValue()->isZero()) {
+ if (!SE.isKnownNonZero(D->getRHS())) {
IsUnsafe = true;
return false;
}
@@ -2613,18 +2610,17 @@ struct SCEVFindUnsafe {
}
bool isDone() const { return IsUnsafe; }
};
-}
+} // namespace
-namespace llvm {
-bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode) {
+bool SCEVExpander::isSafeToExpand(const SCEV *S) const {
SCEVFindUnsafe Search(SE, CanonicalMode);
visitAll(S, Search);
return !Search.IsUnsafe;
}
-bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
- ScalarEvolution &SE) {
- if (!isSafeToExpand(S, SE))
+bool SCEVExpander::isSafeToExpandAt(const SCEV *S,
+ const Instruction *InsertionPoint) const {
+ if (!isSafeToExpand(S))
return false;
// We have to prove that the expanded site of S dominates InsertionPoint.
// This is easy when not in the same block, but hard when S is an instruction
@@ -2674,4 +2670,3 @@ void SCEVExpanderCleaner::cleanup() {
I->eraseFromParent();
}
}
-}
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 4b5ade99767b..1806081678a8 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -4851,7 +4851,7 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
PN.moveBefore(InsertPt);
// Also, add a dummy incoming value for the original BB itself,
// so that the PHI is well-formed until we drop said predecessor.
- PN.addIncoming(UndefValue::get(PN.getType()), BB);
+ PN.addIncoming(PoisonValue::get(PN.getType()), BB);
}
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index af15e0c31b75..0ab79a32f526 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -593,7 +593,7 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
}
// Trunc no longer needed.
- TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ TI->replaceAllUsesWith(PoisonValue::get(TI->getType()));
DeadInsts.emplace_back(TI);
return true;
}
@@ -660,7 +660,7 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
auto *IP = GetLoopInvariantInsertPosition(L, I);
- if (!isSafeToExpandAt(S, IP, *SE)) {
+ if (!Rewriter.isSafeToExpandAt(S, IP)) {
LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I
<< " with non-speculable loop invariant: " << *S << '\n');
return false;
@@ -679,20 +679,30 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
/// Eliminate redundant type cast between integer and float.
bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
- if (UseInst->getOpcode() != CastInst::SIToFP)
+ if (UseInst->getOpcode() != CastInst::SIToFP &&
+ UseInst->getOpcode() != CastInst::UIToFP)
return false;
Value *IVOperand = UseInst->getOperand(0);
// Get the symbolic expression for this instruction.
- ConstantRange IVRange = SE->getSignedRange(SE->getSCEV(IVOperand));
+ const SCEV *IV = SE->getSCEV(IVOperand);
+ unsigned MaskBits;
+ if (UseInst->getOpcode() == CastInst::SIToFP)
+ MaskBits = SE->getSignedRange(IV).getMinSignedBits();
+ else
+ MaskBits = SE->getUnsignedRange(IV).getActiveBits();
unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
- if (IVRange.getActiveBits() <= DestNumSigBits) {
+ if (MaskBits <= DestNumSigBits) {
for (User *U : UseInst->users()) {
- // Match for fptosi of sitofp and with same type.
- auto *CI = dyn_cast<FPToSIInst>(U);
+ // Match for fptosi/fptoui of sitofp and with same type.
+ auto *CI = dyn_cast<CastInst>(U);
if (!CI || IVOperand->getType() != CI->getType())
continue;
+ CastInst::CastOps Opcode = CI->getOpcode();
+ if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI)
+ continue;
+
CI->replaceAllUsesWith(IVOperand);
DeadInsts.push_back(CI);
LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
@@ -1015,7 +1025,7 @@ class WidenIV {
SmallPtrSet<Instruction *,16> Widened;
- enum ExtendKind { ZeroExtended, SignExtended, Unknown };
+ enum class ExtendKind { Zero, Sign, Unknown };
// A map tracking the kind of extension used to widen each narrow IV
// and narrow IV user.
@@ -1172,7 +1182,7 @@ WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges),
DeadInsts(DI) {
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
- ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
+ ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero;
}
Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
@@ -1225,7 +1235,7 @@ Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) {
// about the narrow operand yet so must insert a [sz]ext. It is probably loop
// invariant and will be folded or hoisted. If it actually comes from a
// widened IV, it should be removed during a future call to widenIVUse.
- bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
+ bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign;
Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
? WideDef
: createExtendInst(NarrowUse->getOperand(0), WideType,
@@ -1290,7 +1300,7 @@ Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU,
return WideUse == WideAR;
};
- bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
+ bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign;
if (!GuessNonIVOperand(SignExtend)) {
SignExtend = !SignExtend;
if (!GuessNonIVOperand(SignExtend))
@@ -1350,7 +1360,7 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
// Only Add/Sub/Mul instructions supported yet.
if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
OpCode != Instruction::Mul)
- return {nullptr, Unknown};
+ return {nullptr, ExtendKind::Unknown};
// One operand (NarrowDef) has already been extended to WideDef. Now determine
// if extending the other will lead to a recurrence.
@@ -1362,14 +1372,14 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
const OverflowingBinaryOperator *OBO =
cast<OverflowingBinaryOperator>(DU.NarrowUse);
ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
- if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
+ if (ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap())
ExtendOperExpr = SE->getSignExtendExpr(
SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
- else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
+ else if (ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap())
ExtendOperExpr = SE->getZeroExtendExpr(
SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
else
- return {nullptr, Unknown};
+ return {nullptr, ExtendKind::Unknown};
// When creating this SCEV expr, don't apply the current operations NSW or NUW
// flags. This instruction may be guarded by control flow that the no-wrap
@@ -1387,7 +1397,7 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
if (!AddRec || AddRec->getLoop() != L)
- return {nullptr, Unknown};
+ return {nullptr, ExtendKind::Unknown};
return {AddRec, ExtKind};
}
@@ -1396,17 +1406,17 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) {
/// widening it's type? In other words, can the extend be safely hoisted out of
/// the loop with SCEV reducing the value to a recurrence on the same loop. If
/// so, return the extended recurrence and the kind of extension used. Otherwise
-/// return {nullptr, Unknown}.
+/// return {nullptr, ExtendKind::Unknown}.
WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) {
if (!DU.NarrowUse->getType()->isIntegerTy())
- return {nullptr, Unknown};
+ return {nullptr, ExtendKind::Unknown};
const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
SE->getTypeSizeInBits(WideType)) {
// NarrowUse implicitly widens its operand. e.g. a gep with a narrow
// index. So don't follow this use.
- return {nullptr, Unknown};
+ return {nullptr, ExtendKind::Unknown};
}
const SCEV *WideExpr;
@@ -1414,21 +1424,21 @@ WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) {
if (DU.NeverNegative) {
WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
if (isa<SCEVAddRecExpr>(WideExpr))
- ExtKind = SignExtended;
+ ExtKind = ExtendKind::Sign;
else {
WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
- ExtKind = ZeroExtended;
+ ExtKind = ExtendKind::Zero;
}
- } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
+ } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) {
WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
- ExtKind = SignExtended;
+ ExtKind = ExtendKind::Sign;
} else {
WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
- ExtKind = ZeroExtended;
+ ExtKind = ExtendKind::Zero;
}
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
if (!AddRec || AddRec->getLoop() != L)
- return {nullptr, Unknown};
+ return {nullptr, ExtendKind::Unknown};
return {AddRec, ExtKind};
}
@@ -1468,7 +1478,7 @@ bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) {
//
// (A) == icmp slt i32 sext(%narrow), sext(%val)
// == icmp slt i32 zext(%narrow), sext(%val)
- bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
+ bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
return false;
@@ -1533,8 +1543,8 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
const OverflowingBinaryOperator *OBO =
cast<OverflowingBinaryOperator>(NarrowUse);
ExtendKind ExtKind = getExtendKind(NarrowDef);
- bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap();
- bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
+ bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap();
+ bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap();
auto AnotherOpExtKind = ExtKind;
// Check that all uses are either:
@@ -1564,14 +1574,14 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
// predicates. For equality, it's legal to widen icmp for either sign and
// zero extend. For sign extend, we can also do so for signed predicates,
// likeweise for zero extend we can widen icmp for unsigned predicates.
- if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred))
+ if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred))
return false;
- if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred))
+ if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred))
return false;
ICmpUsers.push_back(ICmp);
continue;
}
- if (ExtKind == SignExtended)
+ if (ExtKind == ExtendKind::Sign)
User = dyn_cast<SExtInst>(User);
else
User = dyn_cast<ZExtInst>(User);
@@ -1594,7 +1604,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
// will most likely not see it. Let's try to prove it.
if (OpCode != Instruction::Add)
return false;
- if (ExtKind != ZeroExtended)
+ if (ExtKind != ExtendKind::Zero)
return false;
const SCEV *LHS = SE->getSCEV(OBO->getOperand(0));
const SCEV *RHS = SE->getSCEV(OBO->getOperand(1));
@@ -1609,7 +1619,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
return false;
// In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as
// neg(zext(neg(op))), which is basically sext(op).
- AnotherOpExtKind = SignExtended;
+ AnotherOpExtKind = ExtendKind::Sign;
}
// Verifying that Defining operand is an AddRec
@@ -1621,14 +1631,16 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
// Generating a widening use instruction.
- Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
- ? WideDef
- : createExtendInst(NarrowUse->getOperand(0), WideType,
- AnotherOpExtKind, NarrowUse);
- Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
- ? WideDef
- : createExtendInst(NarrowUse->getOperand(1), WideType,
- AnotherOpExtKind, NarrowUse);
+ Value *LHS =
+ (NarrowUse->getOperand(0) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(0), WideType,
+ AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
+ Value *RHS =
+ (NarrowUse->getOperand(1) == NarrowDef)
+ ? WideDef
+ : createExtendInst(NarrowUse->getOperand(1), WideType,
+ AnotherOpExtKind == ExtendKind::Sign, NarrowUse);
auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
@@ -1667,7 +1679,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
auto ExtendedOp = [&](Value * V)->Value * {
if (V == NarrowUse)
return WideBO;
- if (ExtKind == ZeroExtended)
+ if (ExtKind == ExtendKind::Zero)
return Builder.CreateZExt(V, WideBO->getType());
else
return Builder.CreateSExt(V, WideBO->getType());
@@ -1723,10 +1735,10 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri
// This narrow use can be widened by a sext if it's non-negative or its narrow
// def was widended by a sext. Same for zext.
auto canWidenBySExt = [&]() {
- return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
+ return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign;
};
auto canWidenByZExt = [&]() {
- return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
+ return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero;
};
// Our raison d'etre! Eliminate sign and zero extension.
@@ -1774,7 +1786,8 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri
if (!WideAddRec.first)
WideAddRec = getWideRecurrence(DU);
- assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
+ assert((WideAddRec.first == nullptr) ==
+ (WideAddRec.second == ExtendKind::Unknown));
if (!WideAddRec.first) {
// If use is a loop condition, try to promote the condition instead of
// truncating the IV first.
@@ -1869,7 +1882,7 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
return nullptr;
// Widen the induction variable expression.
- const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
+ const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign
? SE->getSignExtendExpr(AddRec, WideType)
: SE->getZeroExtendExpr(AddRec, WideType);
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index b359717424a6..bca3b0538c5d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1948,14 +1948,16 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
if (Value *Sqrt = replacePowWithSqrt(Pow, B))
return Sqrt;
+ // If we can approximate pow:
// pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction
+ // pow(x, n) -> powi(x, n) if n is a constant signed integer value
const APFloat *ExpoF;
- if (match(Expo, m_APFloat(ExpoF)) && !ExpoF->isExactlyValue(0.5) &&
- !ExpoF->isExactlyValue(-0.5)) {
+ if (AllowApprox && match(Expo, m_APFloat(ExpoF)) &&
+ !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) {
APFloat ExpoA(abs(*ExpoF));
APFloat ExpoI(*ExpoF);
Value *Sqrt = nullptr;
- if (AllowApprox && !ExpoA.isInteger()) {
+ if (!ExpoA.isInteger()) {
APFloat Expo2 = ExpoA;
// To check if ExpoA is an integer + 0.5, we add it to itself. If there
// is no floating point exception and the result is an integer, then
@@ -1979,7 +1981,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
return nullptr;
}
- // pow(x, n) -> powi(x, n) if n is a constant signed integer value
+ // 0.5 fraction is now optionally handled.
+ // Do pow -> powi for remaining integer exponent
APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
if (ExpoF->isInteger() &&
ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 832353741500..9bbfe06b9abb 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -145,8 +145,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
// locate the exit blocks.
SetVector<BasicBlock *> ExitingBlocks;
SetVector<BasicBlock *> Exits;
- // Record the exit blocks that branch to the same block.
- MapVector<BasicBlock *, SetVector<BasicBlock *> > CommonSuccs;
// We need SetVectors, but the Loop API takes a vector, so we use a temporary.
SmallVector<BasicBlock *, 8> Temp;
@@ -160,11 +158,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
if (SL == L || L->contains(SL))
continue;
Exits.insert(S);
- // The typical case for reducing the number of guard blocks occurs when
- // the exit block has a single predecessor and successor.
- if (S->getSinglePredecessor())
- if (auto *Succ = S->getSingleSuccessor())
- CommonSuccs[Succ].insert(S);
}
}
@@ -179,39 +172,13 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
for (auto EB : ExitingBlocks) {
dbgs() << " " << EB->getName();
}
- dbgs() << "\n";
-
- dbgs() << "Exit blocks with a common successor:\n";
- for (auto CS : CommonSuccs) {
- dbgs() << " Succ " << CS.first->getName() << ", exits:";
- for (auto Exit : CS.second)
- dbgs() << " " << Exit->getName();
- dbgs() << "\n";
- });
+ dbgs() << "\n";);
if (Exits.size() <= 1) {
LLVM_DEBUG(dbgs() << "loop does not have multiple exits; nothing to do\n");
return false;
}
- // When multiple exit blocks branch to the same block, change the control
- // flow hub to after the exit blocks rather than before. This reduces the
- // number of guard blocks needed after the loop.
- for (auto CS : CommonSuccs) {
- auto CB = CS.first;
- auto Preds = CS.second;
- if (Exits.contains(CB))
- continue;
- if (Preds.size() < 2 || Preds.size() == Exits.size())
- continue;
- for (auto Exit : Preds) {
- Exits.remove(Exit);
- ExitingBlocks.remove(Exit->getSinglePredecessor());
- ExitingBlocks.insert(Exit);
- }
- Exits.insert(CB);
- }
-
SmallVector<BasicBlock *, 8> GuardBlocks;
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
auto LoopExitBlock = CreateControlFlowHub(&DTU, GuardBlocks, ExitingBlocks,
@@ -231,17 +198,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
if (auto ParentLoop = L->getParentLoop()) {
for (auto G : GuardBlocks) {
ParentLoop->addBasicBlockToLoop(G, LI);
- // Ensure the guard block predecessors are in a valid loop. After the
- // change to the control flow hub for common successors, a guard block
- // predecessor may not be in a loop or may be in an outer loop.
- for (auto Pred : predecessors(G)) {
- auto PredLoop = LI.getLoopFor(Pred);
- if (!ParentLoop->contains(PredLoop)) {
- if (PredLoop)
- LI.removeBlock(Pred);
- ParentLoop->addBasicBlockToLoop(Pred, LI);
- }
- }
}
ParentLoop->verifyLoop();
}
diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp
index f34f2df971b1..d002922cfd30 100644
--- a/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/llvm/lib/Transforms/Utils/Utils.cpp
@@ -28,7 +28,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeAssumeSimplifyPassLegacyPassPass(Registry);
initializeAssumeBuilderPassLegacyPassPass(Registry);
initializeBreakCriticalEdgesPass(Registry);
- initializeCanonicalizeAliasesLegacyPassPass(Registry);
initializeCanonicalizeFreezeInLoopsPass(Registry);
initializeInstNamerPass(Registry);
initializeLCSSAWrapperPassPass(Registry);
@@ -37,7 +36,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeLowerGlobalDtorsLegacyPassPass(Registry);
initializeLowerInvokeLegacyPassPass(Registry);
initializeLowerSwitchLegacyPassPass(Registry);
- initializeNameAnonGlobalLegacyPassPass(Registry);
initializePromoteLegacyPassPass(Registry);
initializeStripNonLineTableDebugLegacyPassPass(Registry);
initializeUnifyFunctionExitNodesLegacyPassPass(Registry);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0777a1385916..b887ea41676b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -92,6 +92,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -473,7 +474,7 @@ public:
virtual std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton();
/// Widen a single call instruction within the innermost loop.
- void widenCallInstruction(CallInst &I, VPValue *Def, VPUser &ArgOperands,
+ void widenCallInstruction(CallInst &CI, VPValue *Def, VPUser &ArgOperands,
VPTransformState &State);
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
@@ -1447,15 +1448,14 @@ public:
// through scalar predication or masked load/store or masked gather/scatter.
// \p VF is the vectorization factor that will be used to vectorize \p I.
// Superset of instructions that return true for isScalarWithPredication.
- bool isPredicatedInst(Instruction *I, ElementCount VF,
- bool IsKnownUniform = false) {
- // When we know the load is uniform and the original scalar loop was not
- // predicated we don't need to mark it as a predicated instruction. Any
- // vectorised blocks created when tail-folding are something artificial we
- // have introduced and we know there is always at least one active lane.
- // That's why we call Legal->blockNeedsPredication here because it doesn't
- // query tail-folding.
- if (IsKnownUniform && isa<LoadInst>(I) &&
+ bool isPredicatedInst(Instruction *I, ElementCount VF) {
+ // When we know the load's address is loop invariant and the instruction
+ // in the original scalar loop was unconditionally executed then we
+ // don't need to mark it as a predicated instruction. Tail folding may
+ // introduce additional predication, but we're guaranteed to always have
+ // at least one active lane. We call Legal->blockNeedsPredication here
+ // because it doesn't query tail-folding.
+ if (Legal->isUniformMemOp(*I) && isa<LoadInst>(I) &&
!Legal->blockNeedsPredication(I->getParent()))
return false;
if (!blockNeedsPredicationForAnyReason(I->getParent()))
@@ -1657,10 +1657,6 @@ private:
InstructionCost getScalarizationOverhead(Instruction *I,
ElementCount VF) const;
- /// Returns whether the instruction is a load or store and will be a emitted
- /// as a vector operation.
- bool isConsecutiveLoadOrStore(Instruction *I);
-
/// Returns true if an artificially high cost for emulated masked memrefs
/// should be used.
bool useEmulatedMaskMemRefHack(Instruction *I, ElementCount VF);
@@ -1919,10 +1915,13 @@ public:
auto DiffChecks = RtPtrChecking.getDiffChecks();
if (DiffChecks) {
+ Value *RuntimeVF = nullptr;
MemRuntimeCheckCond = addDiffRuntimeChecks(
MemCheckBlock->getTerminator(), L, *DiffChecks, MemCheckExp,
- [VF](IRBuilderBase &B, unsigned Bits) {
- return getRuntimeVF(B, B.getIntNTy(Bits), VF);
+ [VF, &RuntimeVF](IRBuilderBase &B, unsigned Bits) {
+ if (!RuntimeVF)
+ RuntimeVF = getRuntimeVF(B, B.getIntNTy(Bits), VF);
+ return RuntimeVF;
},
IC);
} else {
@@ -2947,11 +2946,17 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
// If tail is to be folded, vector loop takes care of all iterations.
Type *CountTy = Count->getType();
Value *CheckMinIters = Builder.getFalse();
- auto CreateStep = [&]() {
+ auto CreateStep = [&]() -> Value * {
// Create step with max(MinProTripCount, UF * VF).
- if (UF * VF.getKnownMinValue() < MinProfitableTripCount.getKnownMinValue())
- return createStepForVF(Builder, CountTy, MinProfitableTripCount, 1);
- return createStepForVF(Builder, CountTy, VF, UF);
+ if (UF * VF.getKnownMinValue() >= MinProfitableTripCount.getKnownMinValue())
+ return createStepForVF(Builder, CountTy, VF, UF);
+
+ Value *MinProfTC =
+ createStepForVF(Builder, CountTy, MinProfitableTripCount, 1);
+ if (!VF.isScalable())
+ return MinProfTC;
+ return Builder.CreateBinaryIntrinsic(
+ Intrinsic::umax, MinProfTC, createStepForVF(Builder, CountTy, VF, UF));
};
if (!Cost->foldTailByMasking())
@@ -4168,46 +4173,26 @@ bool InnerLoopVectorizer::useOrderedReductions(
return Cost->useOrderedReductions(RdxDesc);
}
-/// A helper function for checking whether an integer division-related
-/// instruction may divide by zero (in which case it must be predicated if
-/// executed conditionally in the scalar code).
-/// TODO: It may be worthwhile to generalize and check isKnownNonZero().
-/// Non-zero divisors that are non compile-time constants will not be
-/// converted into multiplication, so we will still end up scalarizing
-/// the division, but can do so w/o predication.
-static bool mayDivideByZero(Instruction &I) {
- assert((I.getOpcode() == Instruction::UDiv ||
- I.getOpcode() == Instruction::SDiv ||
- I.getOpcode() == Instruction::URem ||
- I.getOpcode() == Instruction::SRem) &&
- "Unexpected instruction");
- Value *Divisor = I.getOperand(1);
- auto *CInt = dyn_cast<ConstantInt>(Divisor);
- return !CInt || CInt->isZero();
-}
-
-void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
+void InnerLoopVectorizer::widenCallInstruction(CallInst &CI, VPValue *Def,
VPUser &ArgOperands,
VPTransformState &State) {
- assert(!isa<DbgInfoIntrinsic>(I) &&
+ assert(!isa<DbgInfoIntrinsic>(CI) &&
"DbgInfoIntrinsic should have been dropped during VPlan construction");
- State.setDebugLocFromInst(&I);
-
- Module *M = I.getParent()->getParent()->getParent();
- auto *CI = cast<CallInst>(&I);
+ State.setDebugLocFromInst(&CI);
SmallVector<Type *, 4> Tys;
- for (Value *ArgOperand : CI->args())
+ for (Value *ArgOperand : CI.args())
Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue()));
- Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
+ Intrinsic::ID ID = getVectorIntrinsicIDForCall(&CI, TLI);
// The flag shows whether we use Intrinsic or a usual Call for vectorized
// version of the instruction.
// Is it beneficial to perform intrinsic call compared to lib call?
bool NeedToScalarize = false;
- InstructionCost CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize);
- InstructionCost IntrinsicCost = ID ? Cost->getVectorIntrinsicCost(CI, VF) : 0;
+ InstructionCost CallCost = Cost->getVectorCallCost(&CI, VF, NeedToScalarize);
+ InstructionCost IntrinsicCost =
+ ID ? Cost->getVectorIntrinsicCost(&CI, VF) : 0;
bool UseVectorIntrinsic = ID && IntrinsicCost <= CallCost;
assert((UseVectorIntrinsic || !NeedToScalarize) &&
"Instruction should be scalarized elsewhere.");
@@ -4215,7 +4200,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
"Either the intrinsic cost or vector call cost must be valid");
for (unsigned Part = 0; Part < UF; ++Part) {
- SmallVector<Type *, 2> TysForDecl = {CI->getType()};
+ SmallVector<Type *, 2> TysForDecl = {CI.getType()};
SmallVector<Value *, 4> Args;
for (auto &I : enumerate(ArgOperands.operands())) {
// Some intrinsics have a scalar argument - don't replace it with a
@@ -4235,27 +4220,28 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
if (UseVectorIntrinsic) {
// Use vector version of the intrinsic.
if (VF.isVector())
- TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+ TysForDecl[0] = VectorType::get(CI.getType()->getScalarType(), VF);
+ Module *M = State.Builder.GetInsertBlock()->getModule();
VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
assert(VectorF && "Can't retrieve vector intrinsic.");
} else {
// Use vector version of the function call.
- const VFShape Shape = VFShape::get(*CI, VF, false /*HasGlobalPred*/);
+ const VFShape Shape = VFShape::get(CI, VF, false /*HasGlobalPred*/);
#ifndef NDEBUG
- assert(VFDatabase(*CI).getVectorizedFunction(Shape) != nullptr &&
+ assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr &&
"Can't create vector function.");
#endif
- VectorF = VFDatabase(*CI).getVectorizedFunction(Shape);
+ VectorF = VFDatabase(CI).getVectorizedFunction(Shape);
}
SmallVector<OperandBundleDef, 1> OpBundles;
- CI->getOperandBundlesAsDefs(OpBundles);
+ CI.getOperandBundlesAsDefs(OpBundles);
CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles);
if (isa<FPMathOperator>(V))
- V->copyFastMathFlags(CI);
+ V->copyFastMathFlags(&CI);
State.set(Def, V, Part);
- State.addMetadata(V, &I);
+ State.addMetadata(V, &CI);
}
}
@@ -4470,7 +4456,9 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::URem:
- return mayDivideByZero(*I);
+ // TODO: We can use the loop-preheader as context point here and get
+ // context sensitive reasoning
+ return !isSafeToSpeculativelyExecute(I);
}
return false;
}
@@ -5406,7 +5394,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
}
LLVM_DEBUG(if (ForceVectorization && !ChosenFactor.Width.isScalar() &&
- ChosenFactor.Cost >= ScalarCost.Cost) dbgs()
+ !isMoreProfitable(ChosenFactor, ScalarCost)) dbgs()
<< "LV: Vectorization seems to be not beneficial, "
<< "but was forced by a user.\n");
LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n");
@@ -6069,7 +6057,8 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
// from moving "masked load/store" check from legality to cost model.
// Masked Load/Gather emulation was previously never allowed.
// Limited number of Masked Store/Scatter emulation was allowed.
- assert(isPredicatedInst(I, VF) && "Expecting a scalar emulated instruction");
+ assert((isPredicatedInst(I, VF) || Legal->isUniformMemOp(*I)) &&
+ "Expecting a scalar emulated instruction");
return isa<LoadInst>(I) ||
(isa<StoreInst>(I) &&
NumPredStores > NumberOfStoresToPredicate);
@@ -6779,19 +6768,29 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
NumPredStores++;
if (Legal->isUniformMemOp(I)) {
- // TODO: Avoid replicating loads and stores instead of
- // relying on instcombine to remove them.
+ // Lowering story for uniform memory ops is currently a bit complicated.
+ // Scalarization works for everything which isn't a store with scalable
+ // VF. Fixed len VFs just scalarize and then DCE later; scalarization
+ // knows how to handle uniform-per-part values (i.e. the first lane
+ // in each unrolled VF) and can thus handle scalable loads too. For
+ // scalable stores, we use a scatter if legal. If not, we have no way
+ // to lower (currently) and thus have to abort vectorization.
+ if (isa<StoreInst>(&I) && VF.isScalable()) {
+ if (isLegalGatherOrScatter(&I, VF))
+ setWideningDecision(&I, VF, CM_GatherScatter,
+ getGatherScatterCost(&I, VF));
+ else
+ // Error case, abort vectorization
+ setWideningDecision(&I, VF, CM_Scalarize,
+ InstructionCost::getInvalid());
+ continue;
+ }
// Load: Scalar load + broadcast
// Store: Scalar store + isLoopInvariantStoreValue ? 0 : extract
- InstructionCost Cost;
- if (isa<StoreInst>(&I) && VF.isScalable() &&
- isLegalGatherOrScatter(&I, VF)) {
- Cost = getGatherScatterCost(&I, VF);
- setWideningDecision(&I, VF, CM_GatherScatter, Cost);
- } else {
- Cost = getUniformMemOpCost(&I, VF);
- setWideningDecision(&I, VF, CM_Scalarize, Cost);
- }
+ // TODO: Avoid replicating loads and stores instead of relying on
+ // instcombine to remove them.
+ setWideningDecision(&I, VF, CM_Scalarize,
+ getUniformMemOpCost(&I, VF));
continue;
}
@@ -7146,13 +7145,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
InstWidening Decision = getWideningDecision(I, Width);
assert(Decision != CM_Unknown &&
"CM decision should be taken at this point");
- if (Decision == CM_Scalarize) {
- if (VF.isScalable() && isa<StoreInst>(I))
- // We can't scalarize a scalable vector store (even a uniform one
- // currently), return an invalid cost so as to prevent vectorization.
- return InstructionCost::getInvalid();
+ if (getWideningCost(I, VF) == InstructionCost::getInvalid())
+ return InstructionCost::getInvalid();
+ if (Decision == CM_Scalarize)
Width = ElementCount::getFixed(1);
- }
}
VectorTy = ToVectorTy(getLoadStoreType(I), Width);
return getMemoryInstructionCost(I, VF);
@@ -7308,14 +7304,6 @@ Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced,
} // end namespace llvm
-bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
- // Check if the pointer operand of a load or store instruction is
- // consecutive.
- if (auto *Ptr = getLoadStorePointerOperand(Inst))
- return Legal->isConsecutivePtr(getLoadStoreType(Inst), Ptr);
- return false;
-}
-
void LoopVectorizationCostModel::collectValuesToIgnore() {
// Ignore ephemeral values.
CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
@@ -8370,7 +8358,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
Range);
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
- [&](ElementCount VF) { return CM.isPredicatedInst(I, VF, IsUniform); },
+ [&](ElementCount VF) { return CM.isPredicatedInst(I, VF); },
Range);
// Even if the instruction is not marked as uniform, there are certain
@@ -8406,8 +8394,6 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()),
IsUniform, IsPredicated);
- setRecipe(I, Recipe);
- Plan->addVPValue(I, Recipe);
// Find if I uses a predicated instruction. If so, it will use its scalar
// value. Avoid hoisting the insert-element which packs the scalar value into
@@ -8426,6 +8412,8 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
// Finalize the recipe for Instr, first if it is not predicated.
if (!IsPredicated) {
LLVM_DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n");
+ setRecipe(I, Recipe);
+ Plan->addVPValue(I, Recipe);
VPBB->appendRecipe(Recipe);
return VPBB;
}
@@ -8436,7 +8424,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
"predicated replication.");
VPBlockUtils::disconnectBlocks(VPBB, SingleSucc);
// Record predicated instructions for above packing optimizations.
- VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan);
+ VPBlockBase *Region = createReplicateRegion(Recipe, Plan);
VPBlockUtils::insertBlockAfter(Region, VPBB);
auto *RegSucc = new VPBasicBlock();
VPBlockUtils::insertBlockAfter(RegSucc, Region);
@@ -8444,11 +8432,12 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
return RegSucc;
}
-VPRegionBlock *VPRecipeBuilder::createReplicateRegion(
- Instruction *Instr, VPReplicateRecipe *PredRecipe, VPlanPtr &Plan) {
+VPRegionBlock *
+VPRecipeBuilder::createReplicateRegion(VPReplicateRecipe *PredRecipe,
+ VPlanPtr &Plan) {
+ Instruction *Instr = PredRecipe->getUnderlyingInstr();
// Instructions marked for predication are replicated and placed under an
// if-then construct to prevent side-effects.
-
// Generate recipes to compute the block mask for this region.
VPValue *BlockInMask = createBlockInMask(Instr->getParent(), Plan);
@@ -8461,9 +8450,13 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(
? nullptr
: new VPPredInstPHIRecipe(PredRecipe);
if (PHIRecipe) {
- Plan->removeVPValueFor(Instr);
+ setRecipe(Instr, PHIRecipe);
Plan->addVPValue(Instr, PHIRecipe);
+ } else {
+ setRecipe(Instr, PredRecipe);
+ Plan->addVPValue(Instr, PredRecipe);
}
+
auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe);
VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true);
@@ -9564,12 +9557,19 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
return;
}
- // Generate scalar instances for all VF lanes of all UF parts, unless the
- // instruction is uniform inwhich case generate only the first lane for each
- // of the UF parts.
- unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue();
- assert((!State.VF.isScalable() || IsUniform) &&
- "Can't scalarize a scalable vector");
+ if (IsUniform) {
+ // Uniform within VL means we need to generate lane 0 only for each
+ // unrolled copy.
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ State.ILV->scalarizeInstruction(getUnderlyingInstr(), this,
+ VPIteration(Part, 0), IsPredicated,
+ State);
+ return;
+ }
+
+ // Generate scalar instances for all VF lanes of all UF parts.
+ assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
+ const unsigned EndLane = State.VF.getKnownMinValue();
for (unsigned Part = 0; Part < State.UF; ++Part)
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
State.ILV->scalarizeInstruction(getUnderlyingInstr(), this,
@@ -9577,52 +9577,6 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
State);
}
-void VPPredInstPHIRecipe::execute(VPTransformState &State) {
- assert(State.Instance && "Predicated instruction PHI works per instance.");
- Instruction *ScalarPredInst =
- cast<Instruction>(State.get(getOperand(0), *State.Instance));
- BasicBlock *PredicatedBB = ScalarPredInst->getParent();
- BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
- assert(PredicatingBB && "Predicated block has no single predecessor.");
- assert(isa<VPReplicateRecipe>(getOperand(0)) &&
- "operand must be VPReplicateRecipe");
-
- // By current pack/unpack logic we need to generate only a single phi node: if
- // a vector value for the predicated instruction exists at this point it means
- // the instruction has vector users only, and a phi for the vector value is
- // needed. In this case the recipe of the predicated instruction is marked to
- // also do that packing, thereby "hoisting" the insert-element sequence.
- // Otherwise, a phi node for the scalar value is needed.
- unsigned Part = State.Instance->Part;
- if (State.hasVectorValue(getOperand(0), Part)) {
- Value *VectorValue = State.get(getOperand(0), Part);
- InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
- PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
- VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
- VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
- if (State.hasVectorValue(this, Part))
- State.reset(this, VPhi, Part);
- else
- State.set(this, VPhi, Part);
- // NOTE: Currently we need to update the value of the operand, so the next
- // predicated iteration inserts its generated value in the correct vector.
- State.reset(getOperand(0), VPhi, Part);
- } else {
- Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
- PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
- Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
- PredicatingBB);
- Phi->addIncoming(ScalarPredInst, PredicatedBB);
- if (State.hasScalarValue(this, *State.Instance))
- State.reset(this, Phi, *State.Instance);
- else
- State.set(this, Phi, *State.Instance);
- // NOTE: Currently we need to update the value of the operand, so the next
- // predicated iteration inserts its generated value in the correct vector.
- State.reset(getOperand(0), Phi, *State.Instance);
- }
-}
-
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
@@ -9793,8 +9747,7 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
};
// 4) if the TTI hook indicates this is profitable, request predication.
- if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT,
- LVL.getLAI()))
+ if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, &LVL))
return CM_ScalarEpilogueNotNeededUsePredicate;
return CM_ScalarEpilogueAllowed;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e136cd9aedac..cd044c78d900 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3744,7 +3744,7 @@ void BoUpSLP::reorderTopToBottom() {
unsigned Opcode0 = TE->getOpcode();
unsigned Opcode1 = TE->getAltOpcode();
// The opcode mask selects between the two opcodes.
- SmallBitVector OpcodeMask(TE->Scalars.size(), 0);
+ SmallBitVector OpcodeMask(TE->Scalars.size(), false);
for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size()))
if (cast<Instruction>(TE->Scalars[Lane])->getOpcode() == Opcode1)
OpcodeMask.set(Lane);
@@ -4814,6 +4814,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
+ // Don't go into catchswitch blocks, which can happen with PHIs.
+ // Such blocks can only have PHIs and the catchswitch. There is no
+ // place to insert a shuffle if we need to, so just avoid that issue.
+ if (isa<CatchSwitchInst>(BB->getTerminator())) {
+ LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ return;
+ }
+
// Check that every instruction appears once in this bundle.
if (!TryToFindDuplicates(S))
return;
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index c7949c42c03e..07d3fa56020b 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -157,10 +157,8 @@ public:
return Ingredient2Recipe[I];
}
- /// Create a replicating region for instruction \p I that requires
- /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I.
- VPRegionBlock *createReplicateRegion(Instruction *I,
- VPReplicateRecipe *PredRecipe,
+ /// Create a replicating region for \p PredRecipe.
+ VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
VPlanPtr &Plan);
/// Build a VPReplicationRecipe for \p I and enclose it within a Region if it
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index fdd901a4a70d..cb7507264667 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -892,6 +892,52 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
ReplaceInstWithInst(CurrentTerminator, CondBr);
}
+void VPPredInstPHIRecipe::execute(VPTransformState &State) {
+ assert(State.Instance && "Predicated instruction PHI works per instance.");
+ Instruction *ScalarPredInst =
+ cast<Instruction>(State.get(getOperand(0), *State.Instance));
+ BasicBlock *PredicatedBB = ScalarPredInst->getParent();
+ BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
+ assert(PredicatingBB && "Predicated block has no single predecessor.");
+ assert(isa<VPReplicateRecipe>(getOperand(0)) &&
+ "operand must be VPReplicateRecipe");
+
+ // By current pack/unpack logic we need to generate only a single phi node: if
+ // a vector value for the predicated instruction exists at this point it means
+ // the instruction has vector users only, and a phi for the vector value is
+ // needed. In this case the recipe of the predicated instruction is marked to
+ // also do that packing, thereby "hoisting" the insert-element sequence.
+ // Otherwise, a phi node for the scalar value is needed.
+ unsigned Part = State.Instance->Part;
+ if (State.hasVectorValue(getOperand(0), Part)) {
+ Value *VectorValue = State.get(getOperand(0), Part);
+ InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
+ PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
+ VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
+ VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
+ if (State.hasVectorValue(this, Part))
+ State.reset(this, VPhi, Part);
+ else
+ State.set(this, VPhi, Part);
+ // NOTE: Currently we need to update the value of the operand, so the next
+ // predicated iteration inserts its generated value in the correct vector.
+ State.reset(getOperand(0), VPhi, Part);
+ } else {
+ Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
+ PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
+ Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
+ PredicatingBB);
+ Phi->addIncoming(ScalarPredInst, PredicatedBB);
+ if (State.hasScalarValue(this, *State.Instance))
+ State.reset(this, Phi, *State.Instance);
+ else
+ State.set(this, Phi, *State.Instance);
+ // NOTE: Currently we need to update the value of the operand, so the next
+ // predicated iteration inserts its generated value in the correct vector.
+ State.reset(getOperand(0), Phi, *State.Instance);
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 3501de6ab38e..43e0a40fedb9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -133,7 +133,9 @@ void VPlanVerifier::verifyHierarchicalCFG(
verifyRegionRec(TopRegion);
}
-static bool verifyVPBasicBlock(const VPBasicBlock *VPBB) {
+static bool
+verifyVPBasicBlock(const VPBasicBlock *VPBB,
+ DenseMap<const VPBlockBase *, unsigned> &BlockNumbering) {
// Verify that phi-like recipes are at the beginning of the block, with no
// other recipes in between.
auto RecipeI = VPBB->begin();
@@ -165,15 +167,71 @@ static bool verifyVPBasicBlock(const VPBasicBlock *VPBB) {
RecipeI++;
}
+ // Verify that defs in VPBB dominate all their uses. The current
+ // implementation is still incomplete.
+ DenseMap<const VPRecipeBase *, unsigned> RecipeNumbering;
+ unsigned Cnt = 0;
+ for (const VPRecipeBase &R : *VPBB)
+ RecipeNumbering[&R] = Cnt++;
+
+ for (const VPRecipeBase &R : *VPBB) {
+ for (const VPValue *V : R.definedValues()) {
+ for (const VPUser *U : V->users()) {
+ auto *UI = dyn_cast<VPRecipeBase>(U);
+ if (!UI || isa<VPHeaderPHIRecipe>(UI))
+ continue;
+
+ // If the user is in the same block, check it comes after R in the
+ // block.
+ if (UI->getParent() == VPBB) {
+ if (RecipeNumbering[UI] < RecipeNumbering[&R]) {
+ errs() << "Use before def!\n";
+ return false;
+ }
+ continue;
+ }
+
+ // Skip blocks outside any region for now and blocks outside
+ // replicate-regions.
+ auto *ParentR = VPBB->getParent();
+ if (!ParentR || !ParentR->isReplicator())
+ continue;
+
+ // For replicators, verify that VPPRedInstPHIRecipe defs are only used
+ // in subsequent blocks.
+ if (isa<VPPredInstPHIRecipe>(&R)) {
+ auto I = BlockNumbering.find(UI->getParent());
+ unsigned BlockNumber = I == BlockNumbering.end() ? std::numeric_limits<unsigned>::max() : I->second;
+ if (BlockNumber < BlockNumbering[ParentR]) {
+ errs() << "Use before def!\n";
+ return false;
+ }
+ continue;
+ }
+
+ // All non-VPPredInstPHIRecipe recipes in the block must be used in
+ // the replicate region only.
+ if (UI->getParent()->getParent() != ParentR) {
+ errs() << "Use before def!\n";
+ return false;
+ }
+ }
+ }
+ }
return true;
}
bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
+ DenseMap<const VPBlockBase *, unsigned> BlockNumbering;
+ unsigned Cnt = 0;
auto Iter = depth_first(
VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
- for (const VPBasicBlock *VPBB :
- VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) {
- if (!verifyVPBasicBlock(VPBB))
+ for (const VPBlockBase *VPB : Iter) {
+ BlockNumbering[VPB] = Cnt++;
+ auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
+ if (!VPBB)
+ continue;
+ if (!verifyVPBasicBlock(VPBB, BlockNumbering))
return false;
}
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index d12624ffb824..a38936644bd3 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1302,7 +1302,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
for (ShuffleVectorInst *SV : Shuffles) {
for (auto U : SV->users()) {
ShuffleVectorInst *SSV = dyn_cast<ShuffleVectorInst>(U);
- if (SSV && isa<UndefValue>(SSV->getOperand(1)))
+ if (SSV && isa<UndefValue>(SSV->getOperand(1)) && SSV->getType() == VT)
Shuffles.push_back(SSV);
}
}