aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td27
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp214
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp19
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h5
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp190
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h14
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrAtomics.td60
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td256
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp9
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td108
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64MCInstLower.h3
-rw-r--r--llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp325
-rw-r--r--llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td46
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA55.td12
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA57.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA64FX.td10
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM3.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM4.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM5.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp15
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h19
-rw-r--r--llvm/lib/Target/AArch64/AArch64SystemOperands.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetObjectFile.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp147
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h10
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp183
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp55
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp3
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp17
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h1
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h1
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h4
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td30
-rw-r--r--llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp4
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp62
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp29
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp35
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp205
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp171
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td24
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp204
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp61
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibFunc.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPTNote.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp30
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp71
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td1
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h1
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h7
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td106
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.h1
-rw-r--r--llvm/lib/Target/AMDGPU/R600Subtarget.h6
-rw-r--r--llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h2
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp418
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp149
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h11
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td8
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td130
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp137
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp55
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp53
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIModeRegister.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp46
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td8
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td15
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h17
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td36
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td41
-rw-r--r--llvm/lib/Target/ARM/ARM.h3
-rw-r--r--llvm/lib/Target/ARM/ARM.td51
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp30
-rw-r--r--llvm/lib/Target/ARM/ARMCallLowering.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMCallLowering.h1
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp14
-rw-r--r--llvm/lib/Target/ARM/ARMFastISel.cpp100
-rw-r--r--llvm/lib/Target/ARM/ARMFrameLowering.cpp18
-rw-r--r--llvm/lib/Target/ARM/ARMHazardRecognizer.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMHazardRecognizer.h2
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp9
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp56
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td96
-rw-r--r--llvm/lib/Target/ARM/ARMInstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h6
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.cpp1
-rw-r--r--llvm/lib/Target/ARM/ARMTargetObjectFile.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMTargetObjectFile.h5
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp12
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h12
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp5
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp7
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h2
-rw-r--r--llvm/lib/Target/ARM/Thumb1FrameLowering.cpp18
-rw-r--r--llvm/lib/Target/ARM/Thumb1InstrInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.h1
-rw-r--r--llvm/lib/Target/ARM/Thumb2SizeReduction.cpp5
-rw-r--r--llvm/lib/Target/ARM/ThumbRegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/AVR/AVR.h48
-rw-r--r--llvm/lib/Target/AVR/AVRCallingConv.td2
-rw-r--r--llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp181
-rw-r--r--llvm/lib/Target/AVR/AVRFrameLowering.cpp96
-rw-r--r--llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp88
-rw-r--r--llvm/lib/Target/AVR/AVRISelLowering.cpp43
-rw-r--r--llvm/lib/Target/AVR/AVRISelLowering.h5
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.td55
-rw-r--r--llvm/lib/Target/AVR/AVRRegisterInfo.cpp5
-rw-r--r--llvm/lib/Target/AVR/AVRRegisterInfo.h4
-rw-r--r--llvm/lib/Target/AVR/AVRRegisterInfo.td20
-rw-r--r--llvm/lib/Target/AVR/AVRSubtarget.cpp3
-rw-r--r--llvm/lib/Target/AVR/AVRSubtarget.h3
-rw-r--r--llvm/lib/Target/AVR/AVRTargetMachine.cpp6
-rw-r--r--llvm/lib/Target/AVR/AVRTargetObjectFile.cpp53
-rw-r--r--llvm/lib/Target/AVR/AVRTargetObjectFile.h5
-rw-r--r--llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp10
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp4
-rw-r--r--llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFMIChecking.cpp4
-rw-r--r--llvm/lib/Target/BPF/BPFMIPeephole.cpp16
-rw-r--r--llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFPreserveDIType.cpp6
-rw-r--r--llvm/lib/Target/BPF/BPFSubtarget.cpp2
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp2
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp2
-rw-r--r--llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp8
-rw-r--r--llvm/lib/Target/CSKY/CSKY.h3
-rw-r--r--llvm/lib/Target/CSKY/CSKY.td34
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp152
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.h16
-rw-r--r--llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp1376
-rw-r--r--llvm/lib/Target/CSKY/CSKYConstantPoolValue.cpp216
-rw-r--r--llvm/lib/Target/CSKY/CSKYConstantPoolValue.h221
-rw-r--r--llvm/lib/Target/CSKY/CSKYFrameLowering.cpp548
-rw-r--r--llvm/lib/Target/CSKY/CSKYFrameLowering.h36
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp18
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.cpp784
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.h100
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrFormatsF1.td274
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrFormatsF2.td208
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.cpp309
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.h27
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.td216
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfoF1.td420
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfoF2.td462
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.td2
-rw-r--r--llvm/lib/Target/CSKY/CSKYTargetMachine.cpp8
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp15
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h4
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/BitTracker.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitTracker.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp19
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCallingConv.td12
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp127
-rw-r--r--llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp20
-rw-r--r--llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp11
-rw-r--r--llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp12
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenInsert.cpp93
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp20
-rw-r--r--llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp27
-rw-r--r--llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp19
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp45
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.h4
-rwxr-xr-x[-rw-r--r--]llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp363
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp122
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.h7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp102
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatterns.td25
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatternsHVX.td255
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatternsV65.td45
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRegisterInfo.td12
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp11
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.cpp78
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.h4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp67
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp11
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp207
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h10
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp10
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp23
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp48
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h29
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp25
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h23
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp26
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp96
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h24
-rw-r--r--llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp2
-rw-r--r--llvm/lib/Target/Lanai/LanaiISelLowering.cpp8
-rw-r--r--llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp5
-rw-r--r--llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/Lanai/LanaiSubtarget.cpp2
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp2
-rw-r--r--llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp2
-rw-r--r--llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp2
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp2
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kCallLowering.h2
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp2
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h2
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp2
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h4
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td2
-rw-r--r--llvm/lib/Target/M68k/M68k.h4
-rw-r--r--llvm/lib/Target/M68k/M68k.td2
-rw-r--r--llvm/lib/Target/M68k/M68kAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kAsmPrinter.h4
-rw-r--r--llvm/lib/Target/M68k/M68kCallingConv.h4
-rw-r--r--llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kExpandPseudo.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kFrameLowering.cpp18
-rw-r--r--llvm/lib/Target/M68k/M68kFrameLowering.h4
-rw-r--r--llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kISelLowering.cpp78
-rw-r--r--llvm/lib/Target/M68k/M68kISelLowering.h10
-rw-r--r--llvm/lib/Target/M68k/M68kInstrArithmetic.td49
-rw-r--r--llvm/lib/Target/M68k/M68kInstrBits.td12
-rw-r--r--llvm/lib/Target/M68k/M68kInstrBuilder.h4
-rw-r--r--llvm/lib/Target/M68k/M68kInstrCompiler.td2
-rw-r--r--llvm/lib/Target/M68k/M68kInstrControl.td2
-rw-r--r--llvm/lib/Target/M68k/M68kInstrData.td2
-rw-r--r--llvm/lib/Target/M68k/M68kInstrFormats.td2
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.cpp14
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.h4
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.td8
-rw-r--r--llvm/lib/Target/M68k/M68kInstrShiftRotate.td2
-rw-r--r--llvm/lib/Target/M68k/M68kMCInstLower.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kMCInstLower.h4
-rw-r--r--llvm/lib/Target/M68k/M68kMachineFunction.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kMachineFunction.h4
-rw-r--r--llvm/lib/Target/M68k/M68kRegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kRegisterInfo.h4
-rw-r--r--llvm/lib/Target/M68k/M68kRegisterInfo.td2
-rw-r--r--llvm/lib/Target/M68k/M68kSchedule.td2
-rw-r--r--llvm/lib/Target/M68k/M68kSubtarget.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kSubtarget.h8
-rw-r--r--llvm/lib/Target/M68k/M68kTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kTargetMachine.h4
-rw-r--r--llvm/lib/Target/M68k/M68kTargetObjectFile.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kTargetObjectFile.h4
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp2
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h4
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kFixupKinds.h4
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp2
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h4
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.h4
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h4
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h4
-rw-r--r--llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp11
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp1
-rw-r--r--llvm/lib/Target/MSP430/MSP430FrameLowering.cpp2
-rw-r--r--llvm/lib/Target/MSP430/MSP430ISelLowering.cpp8
-rw-r--r--llvm/lib/Target/MSP430/MSP430Subtarget.cpp2
-rw-r--r--llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp2
-rw-r--r--llvm/lib/Target/Mips/Mips.h2
-rw-r--r--llvm/lib/Target/Mips/Mips16FrameLowering.cpp4
-rw-r--r--llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/Mips/Mips16ISelLowering.cpp2
-rw-r--r--llvm/lib/Target/Mips/Mips16InstrInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/Mips16RegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsAsmPrinter.cpp20
-rw-r--r--llvm/lib/Target/Mips/MipsCallLowering.h1
-rw-r--r--llvm/lib/Target/Mips/MipsConstantIslandPass.cpp61
-rw-r--r--llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp8
-rw-r--r--llvm/lib/Target/Mips/MipsFastISel.cpp124
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp12
-rw-r--r--llvm/lib/Target/Mips/MipsInstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsMachineFunction.cpp12
-rw-r--r--llvm/lib/Target/Mips/MipsMulMulBugPass.cpp136
-rw-r--r--llvm/lib/Target/Mips/MipsRegisterInfo.cpp8
-rw-r--r--llvm/lib/Target/Mips/MipsSEFrameLowering.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelLowering.cpp23
-rw-r--r--llvm/lib/Target/Mips/MipsSERegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsTargetMachine.cpp10
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.h4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp10
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp20
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp32
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp13
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td56
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td32
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp9
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXUtilities.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp15
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp10
-rw-r--r--llvm/lib/Target/PowerPC/P10InstrResources.td10
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td2
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp43
-rw-r--r--llvm/lib/Target/PowerPC/PPCFastISel.cpp101
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp18
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp47
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td25
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp20
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td24
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrPrefix.td2
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp43
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp14
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.h6
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp21
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp12
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp56
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h3
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h4
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp30
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h5
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCV.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCV.td283
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp24
-rw-r--r--llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp42
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp534
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp891
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h60
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp86
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td7
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp132
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td41
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td80
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoF.td142
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td180
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td611
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td110
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td196
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td254
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td82
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZk.td203
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVMCInstLower.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td35
-rw-r--r--llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp278
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedRocket.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleB.td50
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.cpp68
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h99
-rw-r--r--llvm/lib/Target/RISCV/RISCVSystemOperands.td391
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp9
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp43
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h31
-rw-r--r--llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp2
-rw-r--r--llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp9
-rw-r--r--llvm/lib/Target/Sparc/SparcISelLowering.cpp6
-rw-r--r--llvm/lib/Target/Sparc/SparcTargetObjectFile.h4
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZ.h10
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp44
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.h2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp10
-rw-r--r--llvm/lib/Target/SystemZ/SystemZElimCompare.cpp12
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp33
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.h1
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp14
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h1
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp12
-rw-r--r--llvm/lib/Target/SystemZ/SystemZLongBranch.cpp12
-rw-r--r--llvm/lib/Target/SystemZ/SystemZMCInstLower.h1
-rw-r--r--llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h9
-rw-r--r--llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp18
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZShortenInst.cpp17
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.h2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTDC.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp8
-rw-r--r--llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp2
-rw-r--r--llvm/lib/Target/VE/LVLGen.cpp4
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h4
-rw-r--r--llvm/lib/Target/VE/VE.h1
-rw-r--r--llvm/lib/Target/VE/VECustomDAG.cpp81
-rw-r--r--llvm/lib/Target/VE/VECustomDAG.h79
-rw-r--r--llvm/lib/Target/VE/VEISelLowering.cpp86
-rw-r--r--llvm/lib/Target/VE/VEISelLowering.h4
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.td16
-rw-r--r--llvm/lib/Target/VE/VEInstrPatternsVec.td16
-rw-r--r--llvm/lib/Target/VE/VEMCInstLower.cpp3
-rw-r--r--llvm/lib/Target/VE/VEMachineFunctionInfo.h5
-rw-r--r--llvm/lib/Target/VE/VESubtarget.h2
-rw-r--r--llvm/lib/Target/VE/VVPInstrInfo.td11
-rw-r--r--llvm/lib/Target/VE/VVPInstrPatternsVec.td85
-rw-r--r--llvm/lib/Target/VE/VVPNodes.def5
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h1
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h1
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp64
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp5
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp222
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp8
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp3
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp6
-rw-r--r--llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp64
-rw-r--r--llvm/lib/Target/X86/MCA/X86CustomBehaviour.h47
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h2
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp13
-rw-r--r--llvm/lib/Target/X86/X86AsmPrinter.h1
-rw-r--r--llvm/lib/Target/X86/X86CallLowering.h2
-rw-r--r--llvm/lib/Target/X86/X86FastTileConfig.cpp2
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp103
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.h5
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp6
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp759
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h2
-rw-r--r--llvm/lib/Target/X86/X86IndirectBranchTracking.cpp48
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td7
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp92
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h3
-rw-r--r--llvm/lib/Target/X86/X86InstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/X86/X86OptimizeLEAs.cpp2
-rw-r--r--llvm/lib/Target/X86/X86PadShortFunction.cpp7
-rw-r--r--llvm/lib/Target/X86/X86PartialReduction.cpp65
-rw-r--r--llvm/lib/Target/X86/X86SchedBroadwell.td38
-rw-r--r--llvm/lib/Target/X86/X86SchedHaswell.td38
-rw-r--r--llvm/lib/Target/X86/X86SchedIceLake.td46
-rw-r--r--llvm/lib/Target/X86/X86SchedSandyBridge.td33
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td38
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeServer.td46
-rw-r--r--llvm/lib/Target/X86/X86Schedule.td10
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td2
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBdVer2.td2
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td2
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td20
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td81
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver2.td81
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver3.td2
-rw-r--r--llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp8
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp2
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp114
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/XCore/XCoreFrameLowering.cpp4
-rw-r--r--llvm/lib/Target/XCore/XCoreRegisterInfo.cpp4
-rw-r--r--llvm/lib/Target/XCore/XCoreSubtarget.cpp4
529 files changed, 17397 insertions, 4908 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index b0dd30c13137..4d1464901777 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -26,7 +26,6 @@ class AArch64Subtarget;
class AArch64TargetMachine;
class FunctionPass;
class InstructionSelector;
-class MachineFunctionPass;
FunctionPass *createAArch64DeadRegisterDefinitions();
FunctionPass *createAArch64RedundantCopyEliminationPass();
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index cb17fd94c335..b87468d5c8de 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -416,6 +416,12 @@ def FeatureHCX : SubtargetFeature<
def FeatureLS64 : SubtargetFeature<"ls64", "HasLS64",
"true", "Enable Armv8.7-A LD64B/ST64B Accelerator Extension">;
+def FeatureHBC : SubtargetFeature<"hbc", "HasHBC",
+ "true", "Enable Armv8.8-A Hinted Conditional Branches Extension">;
+
+def FeatureMOPS : SubtargetFeature<"mops", "HasMOPS",
+ "true", "Enable Armv8.8-A memcpy and memset acceleration instructions">;
+
def FeatureBRBE : SubtargetFeature<"brbe", "HasBRBE",
"true", "Enable Branch Record Buffer Extension">;
@@ -497,6 +503,10 @@ def HasV8_7aOps : SubtargetFeature<
"v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions",
[HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>;
+def HasV8_8aOps : SubtargetFeature<
+ "v8.8a", "HasV8_8aOps", "true", "Support ARM v8.8a instructions",
+ [HasV8_7aOps, FeatureHBC, FeatureMOPS]>;
+
def HasV9_0aOps : SubtargetFeature<
"v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions",
[HasV8_5aOps, FeatureSVE2]>;
@@ -509,21 +519,22 @@ def HasV9_2aOps : SubtargetFeature<
"v9.2a", "HasV9_2aOps", "true", "Support ARM v9.2a instructions",
[HasV8_7aOps, HasV9_1aOps]>;
+def HasV9_3aOps : SubtargetFeature<
+ "v9.3a", "HasV9_3aOps", "true", "Support ARM v9.3a instructions",
+ [HasV8_8aOps, HasV9_2aOps]>;
+
def HasV8_0rOps : SubtargetFeature<
"v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
[//v8.1
FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2,
//v8.2
- FeaturePerfMon, FeatureRAS, FeaturePsUAO, FeatureSM4,
- FeatureSHA3, FeatureCCPP, FeatureFullFP16, FeaturePAN_RWV,
+ FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV,
//v8.3
FeatureComplxNum, FeatureCCIDX, FeatureJS,
FeaturePAuth, FeatureRCPC,
//v8.4
- FeatureDotProd, FeatureFP16FML, FeatureTRACEV8_4,
- FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO,
- //v8.5
- FeatureSSBS, FeaturePredRes, FeatureSB, FeatureSpecRestrict]>;
+ FeatureDotProd, FeatureTRACEV8_4, FeatureTLB_RMI,
+ FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO]>;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -955,7 +966,9 @@ def ProcessorFeatures {
list<SubtargetFeature> A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
FeatureETE, FeatureMTE, FeatureFP16FML,
FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8];
- list<SubtargetFeature> R82 = [HasV8_0rOps];
+ list<SubtargetFeature> R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16,
+ FeatureFP16FML, FeatureSSBS, FeaturePredRes,
+ FeatureSB, FeatureSpecRestrict];
list<SubtargetFeature> X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeatureRCPC, FeaturePerfMon,
FeatureSPE, FeatureFullFP16, FeatureDotProd];
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index c90601443934..f26151536a58 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -468,7 +468,7 @@ def CSR_Darwin_AArch64_TLS
// CSR_Darwin_AArch64_CXX_TLS should be a subset of CSR_Darwin_AArch64_TLS.
def CSR_Darwin_AArch64_CXX_TLS
: CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS,
- (sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
+ (sub (sequence "X%u", 1, 28), X9, X15, X16, X17, X18, X19),
(sequence "D%u", 0, 31))>;
// CSRs that are handled by prologue, epilogue.
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index ee6e670fe3cd..109b739528bf 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -443,7 +443,7 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
if (DType == AArch64::DestructiveBinary)
@@ -989,7 +989,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
.addReg(DstReg, RegState::Kill)
.addReg(DstReg, DstFlags | RegState::Implicit);
} else {
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
.add(MI.getOperand(0))
.addUse(DstReg, RegState::Kill);
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 3dc694df509d..c67fa62c7a92 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -355,7 +355,7 @@ unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
+ Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(SI->second)
@@ -378,7 +378,7 @@ unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
: &AArch64::GPR32RegClass;
unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
ResultReg).addReg(ZeroReg, getKillRegState(true));
return ResultReg;
@@ -410,11 +410,11 @@ unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
const TargetRegisterClass *RC = Is64Bit ?
&AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- unsigned TmpReg = createResultReg(RC);
+ Register TmpReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
.addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(TmpReg, getKillRegState(true));
@@ -427,12 +427,12 @@ unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
Align Alignment = DL.getPrefTypeAlign(CFP->getType());
unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
- unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
+ Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(ADRPReg)
.addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
@@ -455,7 +455,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
if (!DestEVT.isSimple())
return 0;
- unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
+ Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
unsigned ResultReg;
if (OpFlags & AArch64II::MO_GOT) {
@@ -482,7 +482,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
// LDRWui produces a 32-bit register, but pointers in-register are 64-bits
// so we must extend the result on ILP32.
- unsigned Result64 = createResultReg(&AArch64::GPR64RegClass);
+ Register Result64 = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::SUBREG_TO_REG))
.addDef(Result64)
@@ -751,7 +751,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
if (const auto *C = dyn_cast<ConstantInt>(RHS))
if (C->getValue() == 0xffffffff) {
Addr.setExtendType(AArch64_AM::UXTW);
- unsigned Reg = getRegForValue(LHS);
+ Register Reg = getRegForValue(LHS);
if (!Reg)
return false;
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
@@ -760,7 +760,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
}
}
- unsigned Reg = getRegForValue(Src);
+ Register Reg = getRegForValue(Src);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
@@ -821,7 +821,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
}
}
- unsigned Reg = getRegForValue(Src);
+ Register Reg = getRegForValue(Src);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
@@ -847,7 +847,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
Addr.setExtendType(AArch64_AM::LSL);
Addr.setExtendType(AArch64_AM::UXTW);
- unsigned Reg = getRegForValue(LHS);
+ Register Reg = getRegForValue(LHS);
if (!Reg)
return false;
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
@@ -879,7 +879,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
break;
Addr.setShift(0);
- unsigned Reg = getRegForValue(Src);
+ Register Reg = getRegForValue(Src);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
@@ -888,7 +888,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
} // end switch
if (Addr.isRegBase() && !Addr.getReg()) {
- unsigned Reg = getRegForValue(Obj);
+ Register Reg = getRegForValue(Obj);
if (!Reg)
return false;
Addr.setReg(Reg);
@@ -896,7 +896,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
}
if (!Addr.getOffsetReg()) {
- unsigned Reg = getRegForValue(Obj);
+ Register Reg = getRegForValue(Obj);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
@@ -1034,7 +1034,7 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
// continue. This should almost never happen.
if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
{
- unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
+ Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(Addr.getFI())
@@ -1178,7 +1178,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
SI->getOpcode() == Instruction::AShr )
std::swap(LHS, RHS);
- unsigned LHSReg = getRegForValue(LHS);
+ Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return 0;
@@ -1207,13 +1207,13 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
- unsigned RHSReg = getRegForValue(SI->getOperand(0));
+ Register RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return 0;
return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
C->getZExtValue(), SetFlags, WantResult);
}
- unsigned RHSReg = getRegForValue(RHS);
+ Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return 0;
return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
@@ -1232,7 +1232,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
- unsigned RHSReg = getRegForValue(MulLHS);
+ Register RHSReg = getRegForValue(MulLHS);
if (!RHSReg)
return 0;
ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
@@ -1255,7 +1255,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
}
uint64_t ShiftVal = C->getZExtValue();
if (ShiftType != AArch64_AM::InvalidShiftExtend) {
- unsigned RHSReg = getRegForValue(SI->getOperand(0));
+ Register RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return 0;
ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
@@ -1267,7 +1267,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
}
}
- unsigned RHSReg = getRegForValue(RHS);
+ Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return 0;
@@ -1489,7 +1489,7 @@ bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
if (CFP->isZero() && !CFP->isNegative())
UseImm = true;
- unsigned LHSReg = getRegForValue(LHS);
+ Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return false;
@@ -1500,7 +1500,7 @@ bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
return true;
}
- unsigned RHSReg = getRegForValue(RHS);
+ Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
@@ -1577,7 +1577,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
if (isa<ConstantInt>(SI->getOperand(1)))
std::swap(LHS, RHS);
- unsigned LHSReg = getRegForValue(LHS);
+ Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return 0;
@@ -1602,7 +1602,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
- unsigned RHSReg = getRegForValue(MulLHS);
+ Register RHSReg = getRegForValue(MulLHS);
if (!RHSReg)
return 0;
ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
@@ -1616,7 +1616,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
if (const auto *SI = dyn_cast<ShlOperator>(RHS))
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
uint64_t ShiftVal = C->getZExtValue();
- unsigned RHSReg = getRegForValue(SI->getOperand(0));
+ Register RHSReg = getRegForValue(SI->getOperand(0));
if (!RHSReg)
return 0;
ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
@@ -1625,7 +1625,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
}
}
- unsigned RHSReg = getRegForValue(RHS);
+ Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return 0;
@@ -1673,7 +1673,7 @@ unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
return 0;
- unsigned ResultReg =
+ Register ResultReg =
fastEmitInst_ri(Opc, RC, LHSReg,
AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
@@ -1715,7 +1715,7 @@ unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
RC = &AArch64::GPR64RegClass;
break;
}
- unsigned ResultReg =
+ Register ResultReg =
fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
@@ -1841,7 +1841,7 @@ unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
}
// Create the base instruction, then add the operands.
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
@@ -1856,7 +1856,7 @@ unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
// For zero-extending loads to 64bit we emit a 32bit load and then convert
// the 32bit reg to a 64bit reg.
if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
- unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
+ Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), Reg64)
.addImm(0)
@@ -1991,7 +1991,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
// The integer extend hasn't been emitted yet. FastISel or SelectionDAG
// could select it. Emit a copy to subreg if necessary. FastISel will remove
// it when it selects the integer extend.
- unsigned Reg = lookUpRegForValue(IntExtVal);
+ Register Reg = lookUpRegForValue(IntExtVal);
auto *MI = MRI.getUniqueVRegDef(Reg);
if (!MI) {
if (RetVT == MVT::i64 && VT <= MVT::i32) {
@@ -2174,7 +2174,7 @@ bool AArch64FastISel::selectStore(const Instruction *I) {
// The non-atomic instructions are sufficient for relaxed stores.
if (isReleaseOrStronger(Ord)) {
// The STLR addressing mode only supports a base reg; pass that directly.
- unsigned AddrReg = getRegForValue(PtrV);
+ Register AddrReg = getRegForValue(PtrV);
return emitStoreRelease(VT, SrcReg, AddrReg,
createMachineMemOperandFor(I));
}
@@ -2339,7 +2339,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
const MCInstrDesc &II = TII.get(Opc);
- unsigned SrcReg = getRegForValue(LHS);
+ Register SrcReg = getRegForValue(LHS);
if (!SrcReg)
return false;
@@ -2454,7 +2454,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
// Fake request the condition, otherwise the intrinsic might be completely
// optimized away.
- unsigned CondReg = getRegForValue(BI->getCondition());
+ Register CondReg = getRegForValue(BI->getCondition());
if (!CondReg)
return false;
@@ -2468,7 +2468,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
}
}
- unsigned CondReg = getRegForValue(BI->getCondition());
+ Register CondReg = getRegForValue(BI->getCondition());
if (CondReg == 0)
return false;
@@ -2480,7 +2480,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
}
const MCInstrDesc &II = TII.get(Opcode);
- unsigned ConstrainedCondReg
+ Register ConstrainedCondReg
= constrainOperandRegClass(II, CondReg, II.getNumDefs());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addReg(ConstrainedCondReg)
@@ -2493,7 +2493,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
const IndirectBrInst *BI = cast<IndirectBrInst>(I);
- unsigned AddrReg = getRegForValue(BI->getOperand(0));
+ Register AddrReg = getRegForValue(BI->getOperand(0));
if (AddrReg == 0)
return false;
@@ -2563,7 +2563,7 @@ bool AArch64FastISel::selectCmp(const Instruction *I) {
}
if (CondCodes) {
- unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
+ Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
TmpReg1)
.addReg(AArch64::WZR, getKillRegState(true))
@@ -2630,18 +2630,18 @@ bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
if (!Opc)
return false;
- unsigned Src1Reg = getRegForValue(Src1Val);
+ Register Src1Reg = getRegForValue(Src1Val);
if (!Src1Reg)
return false;
- unsigned Src2Reg = getRegForValue(Src2Val);
+ Register Src2Reg = getRegForValue(Src2Val);
if (!Src2Reg)
return false;
if (NeedExtraOp)
Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
- unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
+ Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
Src2Reg);
updateValueMap(SI, ResultReg);
return true;
@@ -2690,7 +2690,7 @@ bool AArch64FastISel::selectSelect(const Instruction *I) {
// Try to pickup the flags, so we don't have to emit another compare.
if (foldXALUIntrinsic(CC, I, Cond)) {
// Fake request the condition to force emission of the XALU intrinsic.
- unsigned CondReg = getRegForValue(Cond);
+ Register CondReg = getRegForValue(Cond);
if (!CondReg)
return false;
} else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
@@ -2711,7 +2711,7 @@ bool AArch64FastISel::selectSelect(const Instruction *I) {
}
if (FoldSelect) {
- unsigned SrcReg = getRegForValue(FoldSelect);
+ Register SrcReg = getRegForValue(FoldSelect);
if (!SrcReg)
return false;
@@ -2739,7 +2739,7 @@ bool AArch64FastISel::selectSelect(const Instruction *I) {
}
assert((CC != AArch64CC::AL) && "Unexpected condition code.");
} else {
- unsigned CondReg = getRegForValue(Cond);
+ Register CondReg = getRegForValue(Cond);
if (!CondReg)
return false;
@@ -2753,8 +2753,8 @@ bool AArch64FastISel::selectSelect(const Instruction *I) {
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
}
- unsigned Src1Reg = getRegForValue(SI->getTrueValue());
- unsigned Src2Reg = getRegForValue(SI->getFalseValue());
+ Register Src1Reg = getRegForValue(SI->getTrueValue());
+ Register Src2Reg = getRegForValue(SI->getFalseValue());
if (!Src1Reg || !Src2Reg)
return false;
@@ -2762,7 +2762,7 @@ bool AArch64FastISel::selectSelect(const Instruction *I) {
if (ExtraCC != AArch64CC::AL)
Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
- unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
+ Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
updateValueMap(I, ResultReg);
return true;
}
@@ -2772,11 +2772,11 @@ bool AArch64FastISel::selectFPExt(const Instruction *I) {
if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
return false;
- unsigned Op = getRegForValue(V);
+ Register Op = getRegForValue(V);
if (Op == 0)
return false;
- unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
+ Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
ResultReg).addReg(Op);
updateValueMap(I, ResultReg);
@@ -2788,11 +2788,11 @@ bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
return false;
- unsigned Op = getRegForValue(V);
+ Register Op = getRegForValue(V);
if (Op == 0)
return false;
- unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
+ Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
ResultReg).addReg(Op);
updateValueMap(I, ResultReg);
@@ -2805,7 +2805,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
- unsigned SrcReg = getRegForValue(I->getOperand(0));
+ Register SrcReg = getRegForValue(I->getOperand(0));
if (SrcReg == 0)
return false;
@@ -2825,7 +2825,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
else
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
}
- unsigned ResultReg = createResultReg(
+ Register ResultReg = createResultReg(
DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg);
@@ -2844,7 +2844,7 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
"Unexpected value type.");
- unsigned SrcReg = getRegForValue(I->getOperand(0));
+ Register SrcReg = getRegForValue(I->getOperand(0));
if (!SrcReg)
return false;
@@ -2871,7 +2871,7 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
}
- unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
+ Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
updateValueMap(I, ResultReg);
return true;
}
@@ -2975,11 +2975,11 @@ bool AArch64FastISel::fastLowerArguments() {
} else
llvm_unreachable("Unexpected value type.");
- unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
// Without this, EmitLiveInCopies may eliminate the livein if its only
// use is a bitcast (which isn't turned into an instruction).
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(DstReg, getKillRegState(true));
@@ -3009,7 +3009,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
const Value *ArgVal = CLI.OutVals[VA.getValNo()];
MVT ArgVT = OutVTs[VA.getValNo()];
- unsigned ArgReg = getRegForValue(ArgVal);
+ Register ArgReg = getRegForValue(ArgVal);
if (!ArgReg)
return false;
@@ -3104,7 +3104,7 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
if (CopyVT.isVector() && !Subtarget->isLittleEndian())
return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(RVLocs[0].getLocReg());
@@ -3209,14 +3209,14 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
else if (Addr.getGlobalValue())
MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
else if (Addr.getReg()) {
- unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
+ Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
MIB.addReg(Reg);
} else
return false;
} else {
unsigned CallReg = 0;
if (Symbol) {
- unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
+ Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg)
.addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
@@ -3438,7 +3438,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// SP = FP + Fixed Object + 16
int FI = MFI.CreateFixedObject(4, 0, false);
- unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
+ Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::ADDXri), ResultReg)
.addFrameIndex(FI)
@@ -3568,10 +3568,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
Opc = AArch64::FABSDr;
break;
}
- unsigned SrcReg = getRegForValue(II->getOperand(0));
+ Register SrcReg = getRegForValue(II->getOperand(0));
if (!SrcReg)
return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg);
updateValueMap(II, ResultReg);
@@ -3593,7 +3593,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (!isTypeLegal(RetTy, VT))
return false;
- unsigned Op0Reg = getRegForValue(II->getOperand(0));
+ Register Op0Reg = getRegForValue(II->getOperand(0));
if (!Op0Reg)
return false;
@@ -3671,17 +3671,17 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
break;
case Intrinsic::smul_with_overflow: {
CC = AArch64CC::NE;
- unsigned LHSReg = getRegForValue(LHS);
+ Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return false;
- unsigned RHSReg = getRegForValue(RHS);
+ Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
if (VT == MVT::i32) {
MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
- unsigned MulSubReg =
+ Register MulSubReg =
fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
// cmp xreg, wreg, sxtw
emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
@@ -3701,11 +3701,11 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
}
case Intrinsic::umul_with_overflow: {
CC = AArch64CC::NE;
- unsigned LHSReg = getRegForValue(LHS);
+ Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return false;
- unsigned RHSReg = getRegForValue(RHS);
+ Register RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
@@ -3799,7 +3799,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (!VA.isRegLoc())
return false;
- unsigned Reg = getRegForValue(RV);
+ Register Reg = getRegForValue(RV);
if (Reg == 0)
return false;
@@ -3879,7 +3879,7 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) {
DestVT != MVT::i1)
return false;
- unsigned SrcReg = getRegForValue(Op);
+ Register SrcReg = getRegForValue(Op);
if (!SrcReg)
return false;
@@ -3906,7 +3906,7 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) {
break;
}
// Issue an extract_subreg to get the lower 32-bits.
- unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
+ Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
AArch64::sub_32);
// Create the AND instruction which performs the actual truncation.
ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
@@ -4007,7 +4007,7 @@ unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
if (NeedTrunc)
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
- unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
+ Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
if (NeedTrunc)
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
return ResultReg;
@@ -4033,7 +4033,7 @@ unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
// Just emit a copy for "zero" shifts.
if (Shift == 0) {
if (RetVT == SrcVT) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(Op0);
@@ -4110,7 +4110,7 @@ unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
}
- unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
+ Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
if (NeedTrunc)
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
return ResultReg;
@@ -4136,7 +4136,7 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
// Just emit a copy for "zero" shifts.
if (Shift == 0) {
if (RetVT == SrcVT) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(Op0);
@@ -4226,7 +4226,7 @@ unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
}
- unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
+ Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
if (NeedTrunc)
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
return ResultReg;
@@ -4252,7 +4252,7 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
// Just emit a copy for "zero" shifts.
if (Shift == 0) {
if (RetVT == SrcVT) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(Op0);
@@ -4428,7 +4428,7 @@ bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
return false;
// Check if the load instruction has already been selected.
- unsigned Reg = lookUpRegForValue(LI);
+ Register Reg = lookUpRegForValue(LI);
if (!Reg)
return false;
@@ -4456,7 +4456,7 @@ bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
}
if (IsZExt) {
- unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
+ Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), Reg64)
.addImm(0)
@@ -4490,7 +4490,7 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) {
if (optimizeIntExtLoad(I, RetVT, SrcVT))
return true;
- unsigned SrcReg = getRegForValue(I->getOperand(0));
+ Register SrcReg = getRegForValue(I->getOperand(0));
if (!SrcReg)
return false;
@@ -4499,7 +4499,7 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) {
if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
- unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
.addImm(0)
@@ -4543,21 +4543,21 @@ bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
break;
}
unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
- unsigned Src0Reg = getRegForValue(I->getOperand(0));
+ Register Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
- unsigned Src1Reg = getRegForValue(I->getOperand(1));
+ Register Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
+ Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
assert(QuotReg && "Unexpected DIV instruction emission failure.");
// The remainder is computed as numerator - (quotient * denominator) using the
// MSUB instruction.
- unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
+ Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
updateValueMap(I, ResultReg);
return true;
}
@@ -4602,7 +4602,7 @@ bool AArch64FastISel::selectMul(const Instruction *I) {
}
}
- unsigned Src0Reg = getRegForValue(Src0);
+ Register Src0Reg = getRegForValue(Src0);
if (!Src0Reg)
return false;
@@ -4615,11 +4615,11 @@ bool AArch64FastISel::selectMul(const Instruction *I) {
}
}
- unsigned Src0Reg = getRegForValue(I->getOperand(0));
+ Register Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
- unsigned Src1Reg = getRegForValue(I->getOperand(1));
+ Register Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
@@ -4666,7 +4666,7 @@ bool AArch64FastISel::selectShift(const Instruction *I) {
}
}
- unsigned Op0Reg = getRegForValue(Op0);
+ Register Op0Reg = getRegForValue(Op0);
if (!Op0Reg)
return false;
@@ -4689,11 +4689,11 @@ bool AArch64FastISel::selectShift(const Instruction *I) {
return true;
}
- unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ Register Op0Reg = getRegForValue(I->getOperand(0));
if (!Op0Reg)
return false;
- unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ Register Op1Reg = getRegForValue(I->getOperand(1));
if (!Op1Reg)
return false;
@@ -4746,11 +4746,11 @@ bool AArch64FastISel::selectBitCast(const Instruction *I) {
case MVT::f32: RC = &AArch64::FPR32RegClass; break;
case MVT::f64: RC = &AArch64::FPR64RegClass; break;
}
- unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ Register Op0Reg = getRegForValue(I->getOperand(0));
if (!Op0Reg)
return false;
- unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
+ Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
if (!ResultReg)
return false;
@@ -4810,7 +4810,7 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
return selectBinaryOp(I, ISD::SDIV);
unsigned Lg2 = C.countTrailingZeros();
- unsigned Src0Reg = getRegForValue(I->getOperand(0));
+ Register Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
@@ -4840,7 +4840,7 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
SelectOpc = AArch64::CSELWr;
RC = &AArch64::GPR32RegClass;
}
- unsigned SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
+ Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
AArch64CC::LT);
if (!SelectReg)
return false;
@@ -4866,7 +4866,7 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
/// have to duplicate it for AArch64, because otherwise we would fail during the
/// sign-extend emission.
unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
- unsigned IdxN = getRegForValue(Idx);
+ Register IdxN = getRegForValue(Idx);
if (IdxN == 0)
// Unhandled operand. Halt "fast" selection and bail.
return 0;
@@ -4889,7 +4889,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
if (Subtarget->isTargetILP32())
return false;
- unsigned N = getRegForValue(I->getOperand(0));
+ Register N = getRegForValue(I->getOperand(0));
if (!N)
return false;
@@ -4983,16 +4983,16 @@ bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
const MCInstrDesc &II = TII.get(Opc);
- const unsigned AddrReg = constrainOperandRegClass(
+ const Register AddrReg = constrainOperandRegClass(
II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
- const unsigned DesiredReg = constrainOperandRegClass(
+ const Register DesiredReg = constrainOperandRegClass(
II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
- const unsigned NewReg = constrainOperandRegClass(
+ const Register NewReg = constrainOperandRegClass(
II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
- const unsigned ResultReg1 = createResultReg(ResRC);
- const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
- const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
+ const Register ResultReg1 = createResultReg(ResRC);
+ const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
+ const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
// FIXME: MachineMemOperand doesn't support cmpxchg yet.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 638e45b30d99..a4d20735e2b1 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -547,7 +547,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
return;
for (const auto &Info : CSI) {
- unsigned Reg = Info.getReg();
+ Register Reg = Info.getReg();
// Not all unwinders may know about SVE registers, so assume the lowest
// common demoninator.
@@ -1653,8 +1653,7 @@ static void InsertReturnAddressAuth(MachineFunction &MF,
// The AUTIASP instruction assembles to a hint instruction before v8.3a so
// this instruction can safely used for any v8a architecture.
// From v8.3a onwards there are optimised authenticate LR and return
- // instructions, namely RETA{A,B}, that can be used instead. In this case the
- // DW_CFA_AARCH64_negate_ra_state can't be emitted.
+ // instructions, namely RETA{A,B}, that can be used instead.
if (Subtarget.hasPAuth() && MBBI != MBB.end() &&
MBBI->getOpcode() == AArch64::RET_ReallyLR) {
BuildMI(MBB, MBBI, DL,
@@ -1666,12 +1665,6 @@ static void InsertReturnAddressAuth(MachineFunction &MF,
MBB, MBBI, DL,
TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
.setMIFlag(MachineInstr::FrameDestroy);
-
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameDestroy);
}
}
@@ -2292,7 +2285,7 @@ static void computeCalleeSaveRegisterPairs(
// MachO's compact unwind format relies on all registers being stored in
// pairs.
assert((!produceCompactUnwindFrame(MF) ||
- CC == CallingConv::PreserveMost ||
+ CC == CallingConv::PreserveMost || CC == CallingConv::CXX_FAST_TLS ||
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
int ByteOffset = AFI->getCalleeSavedStackSize();
@@ -2331,7 +2324,7 @@ static void computeCalleeSaveRegisterPairs(
// Add the next reg to the pair if it is in the same register class.
if (unsigned(i + RegInc) < Count) {
- unsigned NextReg = CSI[i + RegInc].getReg();
+ Register NextReg = CSI[i + RegInc].getReg();
bool IsFirst = i == FirstReg;
switch (RPI.Type) {
case RegPairInfo::GPR:
@@ -2387,7 +2380,7 @@ static void computeCalleeSaveRegisterPairs(
// MachO's compact unwind format relies on all registers being stored in
// adjacent register pairs.
assert((!produceCompactUnwindFrame(MF) ||
- CC == CallingConv::PreserveMost ||
+ CC == CallingConv::PreserveMost || CC == CallingConv::CXX_FAST_TLS ||
(RPI.isPaired() &&
((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
RPI.Reg1 + 1 == RPI.Reg2))) &&
@@ -3135,7 +3128,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
DebugLoc DL;
RS->enterBasicBlockEnd(MBB);
RS->backward(std::prev(MBBI));
- unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
+ Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
assert(DstReg && "There must be a free register after frame setup");
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index e6d997f91b47..31f57cbc49f2 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -26,9 +26,8 @@ public:
: TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16),
true /*StackRealignable*/) {}
- void
- emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) const override;
+ void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index fe9b2f8883b9..899f069abdd4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -5147,5 +5147,5 @@ bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
const AArch64TargetLowering *TLI =
static_cast<const AArch64TargetLowering *>(getTargetLowering());
- return TLI->isAllActivePredicate(N);
+ return TLI->isAllActivePredicate(*CurDAG, N);
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e141179fb5c8..a26bbc77f248 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -962,6 +962,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setMinFunctionAlignment(Align(4));
// Set preferred alignments.
setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
+ setMaxBytesForAlignment(STI.getMaxBytesForLoopAlignment());
setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
// Only change the limit for entries in a jump table if specified by
@@ -1205,6 +1206,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
+ setOperationAction(ISD::ABDS, VT, Custom);
+ setOperationAction(ISD::ABDU, VT, Custom);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
@@ -1245,6 +1248,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
// There are no legal MVT::nxv16f## based types.
if (VT != MVT::nxv16i1) {
@@ -1831,6 +1835,28 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
Known = KnownBits::commonBits(Known, Known2);
break;
}
+ case AArch64ISD::BICi: {
+ // Compute the bit cleared value.
+ uint64_t Mask =
+ ~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2));
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known &= KnownBits::makeConstant(APInt(Known.getBitWidth(), Mask));
+ break;
+ }
+ case AArch64ISD::VLSHR: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
+ Known = KnownBits::lshr(Known, Known2);
+ break;
+ }
+ case AArch64ISD::VASHR: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
+ Known = KnownBits::ashr(Known, Known2);
+ break;
+ }
case AArch64ISD::LOADgot:
case AArch64ISD::ADDlow: {
if (!Subtarget->isTargetILP32())
@@ -1971,6 +1997,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::CSINC)
MAKE_CASE(AArch64ISD::THREAD_POINTER)
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
+ MAKE_CASE(AArch64ISD::ABDS_PRED)
+ MAKE_CASE(AArch64ISD::ABDU_PRED)
MAKE_CASE(AArch64ISD::ADD_PRED)
MAKE_CASE(AArch64ISD::MUL_PRED)
MAKE_CASE(AArch64ISD::MULHS_PRED)
@@ -2173,6 +2201,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::INSR)
MAKE_CASE(AArch64ISD::PTEST)
MAKE_CASE(AArch64ISD::PTRUE)
+ MAKE_CASE(AArch64ISD::PFALSE)
MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
@@ -5173,6 +5202,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFixedLengthVectorSelectToSVE(Op, DAG);
case ISD::ABS:
return LowerABS(Op, DAG);
+ case ISD::ABDS:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
+ case ISD::ABDU:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
case ISD::BITREVERSE:
return LowerBitreverse(Op, DAG);
case ISD::BSWAP:
@@ -5380,7 +5413,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
// If this is an 8, 16 or 32-bit value, it is really passed promoted
@@ -5542,7 +5575,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Conservatively forward X8, since it might be used for aggregate return.
if (!CCInfo.isAllocated(AArch64::X8)) {
- unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
+ Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
}
}
@@ -5626,7 +5659,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
- unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
+ Register VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), DL, Val, FIN,
@@ -5656,7 +5689,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
- unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
+ Register VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
@@ -7256,6 +7289,9 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
return getSVESafeBitCast(VT, IntResult, DAG);
}
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
if (SrcVT.bitsLT(VT))
In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
else if (SrcVT.bitsGT(VT))
@@ -7795,10 +7831,37 @@ SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
SelectionDAG &DAG) const {
EVT Ty = Op.getValueType();
auto Idx = Op.getConstantOperandAPInt(2);
+ int64_t IdxVal = Idx.getSExtValue();
+ assert(Ty.isScalableVector() &&
+ "Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
+
+ // We can use the splice instruction for certain index values where we are
+ // able to efficiently generate the correct predicate. The index will be
+ // inverted and used directly as the input to the ptrue instruction, i.e.
+ // -1 -> vl1, -2 -> vl2, etc. The predicate will then be reversed to get the
+ // splice predicate. However, we can only do this if we can guarantee that
+ // there are enough elements in the vector, hence we check the index <= min
+ // number of elements.
+ Optional<unsigned> PredPattern;
+ if (Ty.isScalableVector() && IdxVal < 0 &&
+ (PredPattern = getSVEPredPatternFromNumElements(std::abs(IdxVal))) !=
+ None) {
+ SDLoc DL(Op);
+
+ // Create a predicate where all but the last -IdxVal elements are false.
+ EVT PredVT = Ty.changeVectorElementType(MVT::i1);
+ SDValue Pred = getPTrue(DAG, DL, PredVT, *PredPattern);
+ Pred = DAG.getNode(ISD::VECTOR_REVERSE, DL, PredVT, Pred);
+
+ // Now splice the two inputs together using the predicate.
+ return DAG.getNode(AArch64ISD::SPLICE, DL, Ty, Pred, Op.getOperand(0),
+ Op.getOperand(1));
+ }
// This will select to an EXT instruction, which has a maximum immediate
// value of 255, hence 2048-bits is the maximum value we can lower.
- if (Idx.sge(-1) && Idx.slt(2048 / Ty.getVectorElementType().getSizeInBits()))
+ if (IdxVal >= 0 &&
+ IdxVal < int64_t(2048 / Ty.getVectorElementType().getSizeInBits()))
return Op;
return SDValue();
@@ -8227,7 +8290,7 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
} else {
// Return LR, which contains the return address. Mark it an implicit
// live-in.
- unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
+ Register Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
}
@@ -9631,14 +9694,12 @@ static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
MVT CastVT;
if (getScaledOffsetDup(V, Lane, CastVT)) {
V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
- } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ V.getOperand(0).getValueType().is128BitVector()) {
// The lane is incremented by the index of the extract.
// Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
- auto VecVT = V.getOperand(0).getValueType();
- if (VecVT.isFixedLengthVector() && VecVT.getFixedSizeInBits() <= 128) {
- Lane += V.getConstantOperandVal(1);
- V = V.getOperand(0);
- }
+ Lane += V.getConstantOperandVal(1);
+ V = V.getOperand(0);
} else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
// The lane is decremented if we are splatting from the 2nd operand.
// Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
@@ -9925,7 +9986,7 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
// lowering code.
if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
if (ConstVal->isZero())
- return SDValue(DAG.getMachineNode(AArch64::PFALSE, dl, VT), 0);
+ return DAG.getNode(AArch64ISD::PFALSE, dl, VT);
if (ConstVal->isOne())
return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
}
@@ -10978,6 +11039,28 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
if (!isTypeLegal(VT))
return SDValue();
+ // Break down insert_subvector into simpler parts.
+ if (VT.getVectorElementType() == MVT::i1) {
+ unsigned NumElts = VT.getVectorMinNumElements();
+ EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
+ DAG.getVectorIdxConstant(0, DL));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
+ DAG.getVectorIdxConstant(NumElts / 2, DL));
+ if (Idx < (NumElts / 2)) {
+ SDValue NewLo = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Lo, Vec1,
+ DAG.getVectorIdxConstant(Idx, DL));
+ return DAG.getNode(AArch64ISD::UZP1, DL, VT, NewLo, Hi);
+ } else {
+ SDValue NewHi =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Hi, Vec1,
+ DAG.getVectorIdxConstant(Idx - (NumElts / 2), DL));
+ return DAG.getNode(AArch64ISD::UZP1, DL, VT, Lo, NewHi);
+ }
+ }
+
// Ensure the subvector is half the size of the main vector.
if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
return SDValue();
@@ -11012,10 +11095,10 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
if (Vec0.isUndef())
return Op;
- unsigned int PredPattern =
+ Optional<unsigned> PredPattern =
getSVEPredPatternFromNumElements(InVT.getVectorNumElements());
auto PredTy = VT.changeVectorElementType(MVT::i1);
- SDValue PTrue = getPTrue(DAG, DL, PredTy, PredPattern);
+ SDValue PTrue = getPTrue(DAG, DL, PredTy, *PredPattern);
SDValue ScalableVec1 = convertToScalableVector(DAG, VT, Vec1);
return DAG.getNode(ISD::VSELECT, DL, VT, PTrue, ScalableVec1, Vec0);
}
@@ -11730,10 +11813,10 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::aarch64_ldxr: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
@@ -11741,10 +11824,10 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::aarch64_stxr: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
@@ -11772,7 +11855,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(I.getType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
return true;
}
@@ -11782,7 +11865,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = MVT::getVT(I.getOperand(0)->getType());
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
return true;
}
@@ -12320,7 +12403,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
Value *PTrue = nullptr;
if (UseScalable) {
- unsigned PgPattern =
+ Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(FVTy->getNumElements());
if (Subtarget->getMinSVEVectorSizeInBits() ==
Subtarget->getMaxSVEVectorSizeInBits() &&
@@ -12328,7 +12411,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
PgPattern = AArch64SVEPredPattern::all;
auto *PTruePat =
- ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), PgPattern);
+ ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern);
PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
{PTruePat});
}
@@ -12500,7 +12583,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
Value *PTrue = nullptr;
if (UseScalable) {
- unsigned PgPattern =
+ Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
if (Subtarget->getMinSVEVectorSizeInBits() ==
Subtarget->getMaxSVEVectorSizeInBits() &&
@@ -12509,7 +12592,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
PgPattern = AArch64SVEPredPattern::all;
auto *PTruePat =
- ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), PgPattern);
+ ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern);
PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
{PTruePat});
}
@@ -12901,7 +12984,7 @@ bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
return false;
- return (Index == 0 || Index == ResVT.getVectorNumElements());
+ return (Index == 0 || Index == ResVT.getVectorMinNumElements());
}
/// Turn vector tests of the signbit in the form of:
@@ -14261,6 +14344,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
static SDValue
performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ SDLoc DL(N);
SDValue Vec = N->getOperand(0);
SDValue SubVec = N->getOperand(1);
uint64_t IdxVal = N->getConstantOperandVal(2);
@@ -14286,7 +14370,6 @@ performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// Fold insert_subvector -> concat_vectors
// insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
// insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
- SDLoc DL(N);
SDValue Lo, Hi;
if (IdxVal == 0) {
Lo = SubVec;
@@ -15004,7 +15087,15 @@ static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
Zero);
}
-static bool isAllActivePredicate(SDValue N) {
+static bool isAllInactivePredicate(SDValue N) {
+ // Look through cast.
+ while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
+ N = N.getOperand(0);
+
+ return N.getOpcode() == AArch64ISD::PFALSE;
+}
+
+static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
unsigned NumElts = N.getValueType().getVectorMinNumElements();
// Look through cast.
@@ -15023,6 +15114,21 @@ static bool isAllActivePredicate(SDValue N) {
N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
return N.getValueType().getVectorMinNumElements() >= NumElts;
+ // If we're compiling for a specific vector-length, we can check if the
+ // pattern's VL equals that of the scalable vector at runtime.
+ if (N.getOpcode() == AArch64ISD::PTRUE) {
+ const auto &Subtarget =
+ static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
+ unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
+ unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
+ if (MaxSVESize && MinSVESize == MaxSVESize) {
+ unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
+ unsigned PatNumElts =
+ getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
+ return PatNumElts == (NumElts * VScale);
+ }
+ }
+
return false;
}
@@ -15039,7 +15145,7 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
// ISD way to specify an all active predicate.
- if (isAllActivePredicate(Pg)) {
+ if (isAllActivePredicate(DAG, Pg)) {
if (UnpredOp)
return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
@@ -15870,7 +15976,7 @@ static SDValue performPostLD1Combine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
- if (VT.isScalableVector())
+ if (!VT.is128BitVector() && !VT.is64BitVector())
return SDValue();
unsigned LoadIdx = IsLaneOp ? 1 : 0;
@@ -16710,6 +16816,12 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
EVT CCVT = N0.getValueType();
+ if (isAllActivePredicate(DAG, N0))
+ return N->getOperand(1);
+
+ if (isAllInactivePredicate(N0))
+ return N->getOperand(2);
+
// Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
// supported types.
@@ -18753,7 +18865,7 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");
- unsigned PgPattern =
+ Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(VT.getVectorNumElements());
assert(PgPattern && "Unexpected element count for SVE predicate");
@@ -18789,7 +18901,7 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
break;
}
- return getPTrue(DAG, DL, MaskVT, PgPattern);
+ return getPTrue(DAG, DL, MaskVT, *PgPattern);
}
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
@@ -19281,7 +19393,12 @@ SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
default:
return SDValue();
case ISD::VECREDUCE_OR:
- return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
+ if (isAllActivePredicate(DAG, Pg))
+ // The predicate can be 'Op' because
+ // vecreduce_or(Op & <all true>) <=> vecreduce_or(Op).
+ return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE);
+ else
+ return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
case ISD::VECREDUCE_AND: {
Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
@@ -19725,8 +19842,9 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
return Op;
}
-bool AArch64TargetLowering::isAllActivePredicate(SDValue N) const {
- return ::isAllActivePredicate(N);
+bool AArch64TargetLowering::isAllActivePredicate(SelectionDAG &DAG,
+ SDValue N) const {
+ return ::isAllActivePredicate(DAG, N);
}
EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const {
@@ -19777,7 +19895,7 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
}
-bool AArch64TargetLowering::isConstantUnsignedBitfieldExtactLegal(
+bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal(
unsigned Opc, LLT Ty1, LLT Ty2) const {
return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 367ba3039a0c..ca6c70297c0b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -77,14 +77,16 @@ enum NodeType : unsigned {
SBC, // adc, sbc instructions
// Predicated instructions where inactive lanes produce undefined results.
+ ABDS_PRED,
+ ABDU_PRED,
ADD_PRED,
FADD_PRED,
FDIV_PRED,
FMA_PRED,
- FMAXNM_PRED,
- FMINNM_PRED,
FMAX_PRED,
+ FMAXNM_PRED,
FMIN_PRED,
+ FMINNM_PRED,
FMUL_PRED,
FSUB_PRED,
MUL_PRED,
@@ -321,6 +323,7 @@ enum NodeType : unsigned {
INSR,
PTEST,
PTRUE,
+ PFALSE,
BITREVERSE_MERGE_PASSTHRU,
BSWAP_MERGE_PASSTHRU,
@@ -487,7 +490,6 @@ const unsigned RoundingBitsPos = 22;
} // namespace AArch64
class AArch64Subtarget;
-class AArch64TargetMachine;
class AArch64TargetLowering : public TargetLowering {
public:
@@ -842,7 +844,7 @@ public:
return 128;
}
- bool isAllActivePredicate(SDValue N) const;
+ bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
EVT getPromotedVTForPredicate(EVT VT) const;
EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
@@ -1137,8 +1139,8 @@ private:
// with BITCAST used otherwise.
SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
- bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1,
- LLT Ty2) const override;
+ bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
+ LLT Ty2) const override;
};
namespace AArch64 {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 84573dac7e41..b220929514f9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -102,6 +102,34 @@ def : Pat<(relaxed_load<atomic_load_64>
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(LDURXi GPR64sp:$Rn, simm9:$offset)>;
+// FP 32-bit loads
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend32:$extend))))),
+ (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend32:$extend))))),
+ (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (am_indexed32 GPR64sp:$Rn,
+ uimm12s8:$offset))))),
+ (LDRSui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32>
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
+ (LDURSi GPR64sp:$Rn, simm9:$offset)>;
+
+// FP 64-bit loads
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend))))),
+ (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend))))),
+ (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (am_indexed64 GPR64sp:$Rn,
+ uimm12s8:$offset))))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64>
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset))))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+
//===----------------------------------
// Atomic stores
//===----------------------------------
@@ -196,6 +224,38 @@ def : Pat<(relaxed_store<atomic_store_64>
(am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
(STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;
+// FP 32-bit stores
+def : Pat<(relaxed_store<atomic_store_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend32:$extend),
+ (i32 (bitconvert (f32 FPR32Op:$val)))),
+ (STRSroW FPR32Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend32:$extend),
+ (i32 (bitconvert (f32 FPR32Op:$val)))),
+ (STRSroX FPR32Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32>
+ (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
+ (STRSui FPR32Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store<atomic_store_32>
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
+ (STURSi FPR32Op:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+// FP 64-bit stores
+def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend),
+ (i64 (bitconvert (f64 FPR64Op:$val)))),
+ (STRDroW FPR64Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend),
+ (i64 (bitconvert (f64 FPR64Op:$val)))),
+ (STRDroX FPR64Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64>
+ (am_indexed64 GPR64sp:$Rn, uimm12s4:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
+ (STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store<atomic_store_64>
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
+ (STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;
+
//===----------------------------------
// Low-level exclusive operations
//===----------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f8d492188744..4c1e41b7efee 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1816,10 +1816,10 @@ def am_brcond : Operand<OtherVT> {
let OperandType = "OPERAND_PCREL";
}
-class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target),
- "b", ".$cond\t$target", "",
- [(AArch64brcond bb:$target, imm:$cond, NZCV)]>,
- Sched<[WriteBr]> {
+class BranchCond<bit bit4, string mnemonic>
+ : I<(outs), (ins ccode:$cond, am_brcond:$target),
+ mnemonic, ".$cond\t$target", "",
+ [(AArch64brcond bb:$target, imm:$cond, NZCV)]>, Sched<[WriteBr]> {
let isBranch = 1;
let isTerminator = 1;
let Uses = [NZCV];
@@ -1828,7 +1828,7 @@ class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target),
bits<19> target;
let Inst{31-24} = 0b01010100;
let Inst{23-5} = target;
- let Inst{4} = 0;
+ let Inst{4} = bit4;
let Inst{3-0} = cond;
}
@@ -7700,10 +7700,10 @@ multiclass SIMDTableLookupTied<bit op, string asm> {
//----------------------------------------------------------------------------
-// AdvSIMD scalar CPY
+// AdvSIMD scalar DUP
//----------------------------------------------------------------------------
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
+class BaseSIMDScalarDUP<RegisterClass regtype, RegisterOperand vectype,
string asm, string kind, Operand idxtype>
: I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), asm,
"{\t$dst, $src" # kind # "$idx" #
@@ -7717,30 +7717,30 @@ class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
let Inst{4-0} = dst;
}
-class SIMDScalarCPYAlias<string asm, string size, Instruction inst,
+class SIMDScalarDUPAlias<string asm, string size, Instruction inst,
RegisterClass regtype, RegisterOperand vectype, Operand idxtype>
: InstAlias<asm # "{\t$dst, $src" # size # "$index"
# "|\t$dst, $src$index}",
(inst regtype:$dst, vectype:$src, idxtype:$index), 0>;
-multiclass SIMDScalarCPY<string asm> {
- def i8 : BaseSIMDScalarCPY<FPR8, V128, asm, ".b", VectorIndexB> {
+multiclass SIMDScalarDUP<string asm> {
+ def i8 : BaseSIMDScalarDUP<FPR8, V128, asm, ".b", VectorIndexB> {
bits<4> idx;
let Inst{20-17} = idx;
let Inst{16} = 1;
}
- def i16 : BaseSIMDScalarCPY<FPR16, V128, asm, ".h", VectorIndexH> {
+ def i16 : BaseSIMDScalarDUP<FPR16, V128, asm, ".h", VectorIndexH> {
bits<3> idx;
let Inst{20-18} = idx;
let Inst{17-16} = 0b10;
}
- def i32 : BaseSIMDScalarCPY<FPR32, V128, asm, ".s", VectorIndexS> {
+ def i32 : BaseSIMDScalarDUP<FPR32, V128, asm, ".s", VectorIndexS> {
bits<2> idx;
let Inst{20-19} = idx;
let Inst{18-16} = 0b100;
}
- def i64 : BaseSIMDScalarCPY<FPR64, V128, asm, ".d", VectorIndexD> {
+ def i64 : BaseSIMDScalarDUP<FPR64, V128, asm, ".d", VectorIndexD> {
bits<1> idx;
let Inst{20} = idx;
let Inst{19-16} = 0b1000;
@@ -7751,16 +7751,16 @@ multiclass SIMDScalarCPY<string asm> {
(!cast<Instruction>(NAME # i64) V128:$src, VectorIndexD:$idx)>;
// 'DUP' mnemonic aliases.
- def : SIMDScalarCPYAlias<"dup", ".b",
+ def : SIMDScalarDUPAlias<"dup", ".b",
!cast<Instruction>(NAME#"i8"),
FPR8, V128, VectorIndexB>;
- def : SIMDScalarCPYAlias<"dup", ".h",
+ def : SIMDScalarDUPAlias<"dup", ".h",
!cast<Instruction>(NAME#"i16"),
FPR16, V128, VectorIndexH>;
- def : SIMDScalarCPYAlias<"dup", ".s",
+ def : SIMDScalarDUPAlias<"dup", ".s",
!cast<Instruction>(NAME#"i32"),
FPR32, V128, VectorIndexS>;
- def : SIMDScalarCPYAlias<"dup", ".d",
+ def : SIMDScalarDUPAlias<"dup", ".d",
!cast<Instruction>(NAME#"i64"),
FPR64, V128, VectorIndexD>;
}
@@ -10556,40 +10556,30 @@ class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
pattern> {
}
multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
- SDPatternOperator Accum> {
+ SDPatternOperator op> {
def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h",
[(set (v4i16 V64:$dst),
- (Accum (v4i16 V64:$Rd),
- (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn),
- (v4i16 V64:$Rm)))))]>;
+ (v4i16 (op (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>;
def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h",
[(set (v8i16 V128:$dst),
- (Accum (v8i16 V128:$Rd),
- (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn),
- (v8i16 V128:$Rm)))))]>;
+ (v8i16 (op (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>;
def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s",
[(set (v2i32 V64:$dst),
- (Accum (v2i32 V64:$Rd),
- (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn),
- (v2i32 V64:$Rm)))))]>;
+ (v2i32 (op (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>;
def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s",
[(set (v4i32 V128:$dst),
- (Accum (v4i32 V128:$Rd),
- (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn),
- (v4i32 V128:$Rm)))))]>;
+ (v4i32 (op (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>;
}
multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
- SDPatternOperator Accum> {
+ SDPatternOperator op> {
def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
V64, V64, V128_lo, VectorIndexH,
asm, ".4h", ".4h", ".4h", ".h",
[(set (v4i16 V64:$dst),
- (Accum (v4i16 V64:$Rd),
- (v4i16 (int_aarch64_neon_sqrdmulh
- (v4i16 V64:$Rn),
- (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))))]> {
+ (v4i16 (op (v4i16 V64:$Rd), (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx)))))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -10600,11 +10590,9 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
V128, V128, V128_lo, VectorIndexH,
asm, ".8h", ".8h", ".8h", ".h",
[(set (v8i16 V128:$dst),
- (Accum (v8i16 V128:$Rd),
- (v8i16 (int_aarch64_neon_sqrdmulh
- (v8i16 V128:$Rn),
- (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))))]> {
+ (v8i16 (op (v8i16 V128:$Rd), (v8i16 V128:$Rn),
+ (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx)))))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -10615,75 +10603,26 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
V64, V64, V128, VectorIndexS,
asm, ".2s", ".2s", ".2s", ".s",
[(set (v2i32 V64:$dst),
- (Accum (v2i32 V64:$Rd),
- (v2i32 (int_aarch64_neon_sqrdmulh
- (v2i32 V64:$Rn),
- (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))))]> {
+ (v2i32 (op (v2i32 V64:$Rd), (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx)))))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
}
- // FIXME: it would be nice to use the scalar (v1i32) instruction here, but
- // an intermediate EXTRACT_SUBREG would be untyped.
- // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we
- // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..)))
- def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
- (i32 (vector_extract
- (v4i32 (insert_subvector
- (undef),
- (v2i32 (int_aarch64_neon_sqrdmulh
- (v2i32 V64:$Rn),
- (v2i32 (AArch64duplane32
- (v4i32 V128:$Rm),
- VectorIndexS:$idx)))),
- (i64 0))),
- (i64 0))))),
- (EXTRACT_SUBREG
- (v2i32 (!cast<Instruction>(NAME # v2i32_indexed)
- (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
- FPR32Op:$Rd,
- ssub)),
- V64:$Rn,
- V128:$Rm,
- VectorIndexS:$idx)),
- ssub)>;
-
def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
V128, V128, V128, VectorIndexS,
asm, ".4s", ".4s", ".4s", ".s",
[(set (v4i32 V128:$dst),
- (Accum (v4i32 V128:$Rd),
- (v4i32 (int_aarch64_neon_sqrdmulh
- (v4i32 V128:$Rn),
- (v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))))]> {
+ (v4i32 (op (v4i32 V128:$Rd), (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx)))))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
}
- // FIXME: it would be nice to use the scalar (v1i32) instruction here, but
- // an intermediate EXTRACT_SUBREG would be untyped.
- def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
- (i32 (vector_extract
- (v4i32 (int_aarch64_neon_sqrdmulh
- (v4i32 V128:$Rn),
- (v4i32 (AArch64duplane32
- (v4i32 V128:$Rm),
- VectorIndexS:$idx)))),
- (i64 0))))),
- (EXTRACT_SUBREG
- (v4i32 (!cast<Instruction>(NAME # v4i32_indexed)
- (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
- FPR32Op:$Rd,
- ssub)),
- V128:$Rn,
- V128:$Rm,
- VectorIndexS:$idx)),
- ssub)>;
-
def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
FPR16Op, FPR16Op, V128_lo,
VectorIndexH, asm, ".h", "", "", ".h",
@@ -10698,11 +10637,9 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
FPR32Op, FPR32Op, V128, VectorIndexS,
asm, ".s", "", "", ".s",
[(set (i32 FPR32Op:$dst),
- (Accum (i32 FPR32Op:$Rd),
- (i32 (int_aarch64_neon_sqrdmulh
- (i32 FPR32Op:$Rn),
- (i32 (vector_extract (v4i32 V128:$Rm),
- VectorIndexS:$idx))))))]> {
+ (i32 (op (i32 FPR32Op:$Rd), (i32 FPR32Op:$Rn),
+ (i32 (vector_extract (v4i32 V128:$Rm),
+ VectorIndexS:$idx)))))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -11430,6 +11367,123 @@ class Store64BV<bits<3> opc, string asm_inst, list<dag> pat = []>
let Inst{20-16} = Rs;
}
+class MOPSMemoryCopyMoveBase<bit isMove, bits<2> opcode, bits<2> op1,
+ bits<2> op2, string asm>
+ : I<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
+ (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
+ asm, "\t[$Rd]!, [$Rs]!, $Rn!",
+ "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb", []>,
+ Sched<[]> {
+ bits<5> Rd;
+ bits<5> Rs;
+ bits<5> Rn;
+ let Inst{31-27} = 0b00011;
+ let Inst{26} = isMove;
+ let Inst{25-24} = 0b01;
+ let Inst{23-22} = opcode;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Rs;
+ let Inst{15-14} = op2;
+ let Inst{13-12} = op1;
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeCPYMemOpInstruction";
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+class MOPSMemoryCopy<bits<2> opcode, bits<2> op1, bits<2> op2, string asm>
+ : MOPSMemoryCopyMoveBase<0, opcode, op1, op2, asm>;
+
+class MOPSMemoryMove<bits<2> opcode, bits<2> op1, bits<2> op2, string asm>
+ : MOPSMemoryCopyMoveBase<1, opcode, op1, op2, asm>;
+
+class MOPSMemorySetBase<bit isTagging, bits<2> opcode, bit op1, bit op2,
+ string asm>
+ : I<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
+ (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
+ asm, "\t[$Rd]!, $Rn!, $Rm",
+ "$Rd = $Rd_wb,$Rn = $Rn_wb", []>,
+ Sched<[]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-27} = 0b00011;
+ let Inst{26} = isTagging;
+ let Inst{25-21} = 0b01110;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = opcode;
+ let Inst{13} = op2;
+ let Inst{12} = op1;
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeSETMemOpInstruction";
+ let mayLoad = 0;
+ let mayStore = 1;
+}
+
+class MOPSMemorySet<bits<2> opcode, bit op1, bit op2, string asm>
+ : MOPSMemorySetBase<0, opcode, op1, op2, asm>;
+
+class MOPSMemorySetTagging<bits<2> opcode, bit op1, bit op2, string asm>
+ : MOPSMemorySetBase<1, opcode, op1, op2, asm>;
+
+multiclass MOPSMemoryCopyInsns<bits<2> opcode, string asm> {
+ def "" : MOPSMemoryCopy<opcode, 0b00, 0b00, asm>;
+ def WN : MOPSMemoryCopy<opcode, 0b00, 0b01, asm # "wn">;
+ def RN : MOPSMemoryCopy<opcode, 0b00, 0b10, asm # "rn">;
+ def N : MOPSMemoryCopy<opcode, 0b00, 0b11, asm # "n">;
+ def WT : MOPSMemoryCopy<opcode, 0b01, 0b00, asm # "wt">;
+ def WTWN : MOPSMemoryCopy<opcode, 0b01, 0b01, asm # "wtwn">;
+ def WTRN : MOPSMemoryCopy<opcode, 0b01, 0b10, asm # "wtrn">;
+ def WTN : MOPSMemoryCopy<opcode, 0b01, 0b11, asm # "wtn">;
+ def RT : MOPSMemoryCopy<opcode, 0b10, 0b00, asm # "rt">;
+ def RTWN : MOPSMemoryCopy<opcode, 0b10, 0b01, asm # "rtwn">;
+ def RTRN : MOPSMemoryCopy<opcode, 0b10, 0b10, asm # "rtrn">;
+ def RTN : MOPSMemoryCopy<opcode, 0b10, 0b11, asm # "rtn">;
+ def T : MOPSMemoryCopy<opcode, 0b11, 0b00, asm # "t">;
+ def TWN : MOPSMemoryCopy<opcode, 0b11, 0b01, asm # "twn">;
+ def TRN : MOPSMemoryCopy<opcode, 0b11, 0b10, asm # "trn">;
+ def TN : MOPSMemoryCopy<opcode, 0b11, 0b11, asm # "tn">;
+}
+
+multiclass MOPSMemoryMoveInsns<bits<2> opcode, string asm> {
+ def "" : MOPSMemoryMove<opcode, 0b00, 0b00, asm>;
+ def WN : MOPSMemoryMove<opcode, 0b00, 0b01, asm # "wn">;
+ def RN : MOPSMemoryMove<opcode, 0b00, 0b10, asm # "rn">;
+ def N : MOPSMemoryMove<opcode, 0b00, 0b11, asm # "n">;
+ def WT : MOPSMemoryMove<opcode, 0b01, 0b00, asm # "wt">;
+ def WTWN : MOPSMemoryMove<opcode, 0b01, 0b01, asm # "wtwn">;
+ def WTRN : MOPSMemoryMove<opcode, 0b01, 0b10, asm # "wtrn">;
+ def WTN : MOPSMemoryMove<opcode, 0b01, 0b11, asm # "wtn">;
+ def RT : MOPSMemoryMove<opcode, 0b10, 0b00, asm # "rt">;
+ def RTWN : MOPSMemoryMove<opcode, 0b10, 0b01, asm # "rtwn">;
+ def RTRN : MOPSMemoryMove<opcode, 0b10, 0b10, asm # "rtrn">;
+ def RTN : MOPSMemoryMove<opcode, 0b10, 0b11, asm # "rtn">;
+ def T : MOPSMemoryMove<opcode, 0b11, 0b00, asm # "t">;
+ def TWN : MOPSMemoryMove<opcode, 0b11, 0b01, asm # "twn">;
+ def TRN : MOPSMemoryMove<opcode, 0b11, 0b10, asm # "trn">;
+ def TN : MOPSMemoryMove<opcode, 0b11, 0b11, asm # "tn">;
+}
+
+multiclass MOPSMemorySetInsns<bits<2> opcode, string asm> {
+ def "" : MOPSMemorySet<opcode, 0, 0, asm>;
+ def T : MOPSMemorySet<opcode, 1, 0, asm # "t">;
+ def N : MOPSMemorySet<opcode, 0, 1, asm # "n">;
+ def TN : MOPSMemorySet<opcode, 1, 1, asm # "tn">;
+}
+
+multiclass MOPSMemorySetTaggingInsns<bits<2> opcode, string asm> {
+ def "" : MOPSMemorySetTagging<opcode, 0, 0, asm>;
+ def T : MOPSMemorySetTagging<opcode, 1, 0, asm # "t">;
+ def N : MOPSMemorySetTagging<opcode, 0, 1, asm # "n">;
+ def TN : MOPSMemorySetTagging<opcode, 1, 1, asm # "tn">;
+}
+
//----------------------------------------------------------------------------
// Allow the size specifier tokens to be upper case, not just lower.
def : TokenAlias<".4B", ".4b">; // Add dot product
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5fc5e4e5eb35..93c17133c845 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2574,6 +2574,7 @@ AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
AM.BaseReg = Base->getReg();
AM.Displacement = Offset;
AM.ScaledReg = 0;
+ AM.Scale = 0;
return AM;
}
@@ -7350,8 +7351,7 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
.setMIFlags(MachineInstr::FrameSetup);
// If v8.3a features are available we can replace a RET instruction by
- // RETAA or RETAB and omit the AUT instructions. In this case the
- // DW_CFA_AARCH64_negate_ra_state can't be emitted.
+ // RETAA or RETAB and omit the AUT instructions
if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
MBBAUT->getOpcode() == AArch64::RET) {
BuildMI(MBB, MBBAUT, DL,
@@ -7364,11 +7364,6 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
: AArch64::AUTIBSP))
.setMIFlag(MachineInstr::FrameDestroy);
- unsigned CFIIndexAuth =
- MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
- BuildMI(MBB, MBBAUT, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndexAuth)
- .setMIFlags(MachineInstr::FrameDestroy);
}
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index b2f9e82a7e8b..1054bea40e68 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -26,7 +26,6 @@
namespace llvm {
class AArch64Subtarget;
-class AArch64TargetMachine;
static const MachineMemOperand::Flags MOSuppressPair =
MachineMemOperand::MOTargetFlag1;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ebccc07edc7a..c8a697c8b82f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -33,6 +33,8 @@ def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">,
AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">;
def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">,
AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">;
+def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">,
+ AssemblerPredicate<(all_of HasV9_3aOps), "armv9.3a">;
def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
AssemblerPredicate<(all_of HasV8_0rOps), "armv8-r">;
@@ -198,6 +200,10 @@ def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
AssemblerPredicate<(all_of FeatureBRBE), "brbe">;
def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">;
+def HasHBC : Predicate<"Subtarget->hasHBC()">,
+ AssemblerPredicate<(all_of FeatureHBC), "hbc">;
+def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
+ AssemblerPredicate<(all_of FeatureMOPS), "mops">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
@@ -2362,7 +2368,12 @@ def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
-def Bcc : BranchCond;
+def Bcc : BranchCond<0, "b">;
+
+// Armv8.8-A variant form which hints to the branch predictor that
+// this branch is very likely to go the same way nearly all the time
+// (even though it is not known at compile time _which_ way that is).
+def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
//===----------------------------------------------------------------------===//
// Compare-and-branch instructions.
@@ -4500,9 +4511,9 @@ defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>;
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
- int_aarch64_neon_sqadd>;
+ int_aarch64_neon_sqrdmlah>;
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
- int_aarch64_neon_sqsub>;
+ int_aarch64_neon_sqrdmlsh>;
// Extra saturate patterns, other than the intrinsics matches above
defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
@@ -4769,15 +4780,11 @@ defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
let Predicates = [HasRDM] in {
defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
- def : Pat<(i32 (int_aarch64_neon_sqadd
- (i32 FPR32:$Rd),
- (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
- (i32 FPR32:$Rm))))),
+ def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))),
(SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
- def : Pat<(i32 (int_aarch64_neon_sqsub
- (i32 FPR32:$Rd),
- (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
- (i32 FPR32:$Rm))))),
+ def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))),
(SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
}
@@ -5342,19 +5349,6 @@ def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
(v2i32 (trunc (v2i64 V128:$Vm))))),
(UZP1v4i32 V128:$Vn, V128:$Vm)>;
-def : Pat<(v16i8 (concat_vectors
- (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
- (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))),
- (UZP2v16i8 V128:$Vn, V128:$Vm)>;
-def : Pat<(v8i16 (concat_vectors
- (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))),
- (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))),
- (UZP2v8i16 V128:$Vn, V128:$Vm)>;
-def : Pat<(v4i32 (concat_vectors
- (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))),
- (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))),
- (UZP2v4i32 V128:$Vn, V128:$Vm)>;
-
//----------------------------------------------------------------------------
// AdvSIMD TBL/TBX instructions
//----------------------------------------------------------------------------
@@ -5376,10 +5370,10 @@ def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
//----------------------------------------------------------------------------
-// AdvSIMD scalar CPY instruction
+// AdvSIMD scalar DUP instruction
//----------------------------------------------------------------------------
-defm CPY : SIMDScalarCPY<"mov">;
+defm DUP : SIMDScalarDUP<"mov">;
//----------------------------------------------------------------------------
// AdvSIMD scalar pairwise instructions
@@ -5790,7 +5784,7 @@ defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
// Floating point vector extractions are codegen'd as either a sequence of
-// subregister extractions, or a MOV (aka CPY here, alias for DUP) if
+// subregister extractions, or a MOV (aka DUP here) if
// the lane number is anything other than zero.
def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
@@ -5803,13 +5797,13 @@ def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
- (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
+ (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
- (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
+ (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>;
def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
- (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
+ (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
- (bf16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
+ (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
@@ -6407,9 +6401,9 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
int_aarch64_neon_sqsub>;
defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
- int_aarch64_neon_sqadd>;
+ int_aarch64_neon_sqrdmlah>;
defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
- int_aarch64_neon_sqsub>;
+ int_aarch64_neon_sqrdmlsh>;
defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
@@ -6425,6 +6419,22 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
VectorIndexS:$idx)),
(SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
+// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
+// have no common bits.
+def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
+ [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
+ if (N->getOpcode() == ISD::ADD)
+ return true;
+ return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
+}]> {
+ let GISelPredicateCode = [{
+ // Only handle G_ADD for now. FIXME. build capability to compute whether
+ // operands of G_OR have common bits set or not.
+ return MI.getOpcode() == TargetOpcode::G_ADD;
+ }];
+}
+
+
//----------------------------------------------------------------------------
// AdvSIMD scalar shift instructions
//----------------------------------------------------------------------------
@@ -6530,7 +6540,7 @@ defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
(AArch64srshri node:$MHS, node:$RHS))>>;
defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>;
defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
- TriOpFrag<(add node:$LHS,
+ TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vashr node:$MHS, node:$RHS))>>;
defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
int_aarch64_neon_uqrshrn>;
@@ -6543,7 +6553,7 @@ defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
(AArch64urshri node:$MHS, node:$RHS))>>;
defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>;
defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
- TriOpFrag<(add node:$LHS,
+ TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vlshr node:$MHS, node:$RHS))>>;
//----------------------------------------------------------------------------
@@ -6585,7 +6595,7 @@ defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
- TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
+ TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
int_aarch64_neon_vcvtfxu2fp>;
defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
@@ -6601,7 +6611,7 @@ defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
- TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
+ TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
// RADDHN patterns for when RSHRN shifts by half the size of the vector element
def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
@@ -8106,7 +8116,7 @@ class NTStore128Pat<ValueType VT> :
Pat<(nontemporalstore (VT FPR128:$Rt),
(am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
(STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
- (CPYi64 FPR128:$Rt, (i64 1)),
+ (DUPi64 FPR128:$Rt, (i64 1)),
GPR64sp:$Rn, simm7s8:$offset)>;
def : NTStore128Pat<v2i64>;
@@ -8118,7 +8128,7 @@ class NTStore64Pat<ValueType VT> :
Pat<(nontemporalstore (VT FPR64:$Rt),
(am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
(STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
- (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
+ (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
GPR64sp:$Rn, simm7s4:$offset)>;
// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
@@ -8319,6 +8329,26 @@ let Predicates = [HasLS64] in {
def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
}
+let Predicates = [HasMOPS] in {
+ defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
+ defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
+ defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
+
+ defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
+ defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
+ defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
+
+ defm SETP : MOPSMemorySetInsns<0b00, "setp">;
+ defm SETM : MOPSMemorySetInsns<0b01, "setm">;
+ defm SETE : MOPSMemorySetInsns<0b10, "sete">;
+}
+let Predicates = [HasMOPS, HasMTE] in {
+ defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">;
+ defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">;
+ // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
+ defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
+}
+
let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in
def StoreSwiftAsyncContext
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 3a836ac33064..6aefc1fdb599 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1139,7 +1139,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
? getLdStOffsetOp(*StoreI).getImm()
: getLdStOffsetOp(*StoreI).getImm() * StoreSize;
int Width = LoadSize * 8;
- unsigned DestReg =
+ Register DestReg =
IsStoreXReg ? Register(TRI->getMatchingSuperReg(
LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
: LdRt;
diff --git a/llvm/lib/Target/AArch64/AArch64MCInstLower.h b/llvm/lib/Target/AArch64/AArch64MCInstLower.h
index 8f3148a98410..b008e49d52dd 100644
--- a/llvm/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/llvm/lib/Target/AArch64/AArch64MCInstLower.h
@@ -14,15 +14,12 @@
namespace llvm {
class AsmPrinter;
-class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
class MCSymbol;
class MachineInstr;
-class MachineModuleInfoMachO;
class MachineOperand;
-class Mangler;
/// AArch64MCInstLower - This class is used to lower an MachineInstr
/// into an MCInst.
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 42db18332f1c..1fc5617b49f6 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -11,12 +11,19 @@
// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
//
+// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
+// MOVi64imm + ADDXrr ==> ANDXri + ANDXri
+//
+// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
+// MOVi64imm + SUBXrr ==> SUBXri + SUBXri
+//
// The mov pseudo instruction could be expanded to multiple mov instructions
// later. In this case, we could try to split the constant operand of mov
-// instruction into two bitmask immediates. It makes two AND instructions
-// intead of multiple `mov` + `and` instructions.
+// instruction into two immediates which can be directly encoded into
+// *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
+// multiple `mov` + `and/add/sub` instructions.
//
-// 2. Remove redundant ORRWrs which is generated by zero-extend.
+// 4. Remove redundant ORRWrs which is generated by zero-extend.
//
// %3:gpr32 = ORRWrs $wzr, %2, 0
// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
@@ -30,6 +37,7 @@
#include "AArch64ExpandImm.h"
#include "AArch64InstrInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -48,11 +56,44 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
}
const AArch64InstrInfo *TII;
+ const AArch64RegisterInfo *TRI;
MachineLoopInfo *MLI;
MachineRegisterInfo *MRI;
template <typename T>
- bool visitAND(MachineInstr &MI,
+ using SplitAndOpcFunc =
+ std::function<Optional<unsigned>(T, unsigned, T &, T &)>;
+ using BuildMIFunc =
+ std::function<void(MachineInstr &, unsigned, unsigned, unsigned, Register,
+ Register, Register)>;
+
+ /// For instructions where an immediate operand could be split into two
+ /// separate immediate instructions, use the splitTwoPartImm two handle the
+ /// optimization.
+ ///
+ /// To implement, the following function types must be passed to
+ /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
+ /// splitting the immediate is valid and returns the associated new opcode. A
+ /// BuildMIFunc must be implemented to build the two immediate instructions.
+ ///
+ /// Example Pattern (where IMM would require 2+ MOV instructions):
+ /// %dst = <Instr>rr %src IMM [...]
+ /// becomes:
+ /// %tmp = <Instr>ri %src (encode half IMM) [...]
+ /// %dst = <Instr>ri %tmp (encode half IMM) [...]
+ template <typename T>
+ bool splitTwoPartImm(MachineInstr &MI,
+ SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
+ SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
+
+ bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
+ MachineInstr *&SubregToRegMI);
+
+ template <typename T>
+ bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
+ SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
+ template <typename T>
+ bool visitAND(unsigned Opc, MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
bool visitORR(MachineInstr &MI,
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
@@ -116,7 +157,8 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
template <typename T>
bool AArch64MIPeepholeOpt::visitAND(
- MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+ unsigned Opc, MachineInstr &MI,
+ SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
// Try below transformation.
//
// MOVi32imm + ANDWrr ==> ANDWri + ANDWri
@@ -127,23 +169,151 @@ bool AArch64MIPeepholeOpt::visitAND(
// bitmask immediates. It makes only two AND instructions intead of multiple
// mov + and instructions.
- unsigned RegSize = sizeof(T) * 8;
- assert((RegSize == 32 || RegSize == 64) &&
- "Invalid RegSize for AND bitmask peephole optimization");
+ return splitTwoPartImm<T>(
+ MI, ToBeRemoved,
+ [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<unsigned> {
+ if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
+ return Opc;
+ return None;
+ },
+ [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
+ unsigned Imm1, Register SrcReg, Register NewTmpReg,
+ Register NewDstReg) {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock *MBB = MI.getParent();
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
+ .addReg(SrcReg)
+ .addImm(Imm0);
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
+ .addReg(NewTmpReg)
+ .addImm(Imm1);
+ });
+}
+
+bool AArch64MIPeepholeOpt::visitORR(
+ MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+ // Check this ORR comes from below zero-extend pattern.
+ //
+ // def : Pat<(i64 (zext GPR32:$src)),
+ // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
+ if (MI.getOperand(3).getImm() != 0)
+ return false;
+
+ if (MI.getOperand(1).getReg() != AArch64::WZR)
+ return false;
+
+ MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
+ if (!SrcMI)
+ return false;
+
+ // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
+ //
+ // When you use the 32-bit form of an instruction, the upper 32 bits of the
+ // source registers are ignored and the upper 32 bits of the destination
+ // register are set to zero.
+ //
+ // If AArch64's 32-bit form of instruction defines the source operand of
+ // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
+ // real AArch64 instruction and if it is not, do not process the opcode
+ // conservatively.
+ if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
+ return false;
+
+ Register DefReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
+ MRI->replaceRegWith(DefReg, SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ // replaceRegWith changes MI's definition register. Keep it for SSA form until
+ // deleting MI.
+ MI.getOperand(0).setReg(DefReg);
+ ToBeRemoved.insert(&MI);
+
+ LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
+
+ return true;
+}
+
+template <typename T>
+static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
+ // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
+ // imm0 and imm1 are non-zero 12-bit unsigned int.
+ if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
+ (Imm & ~static_cast<T>(0xffffff)) != 0)
+ return false;
+
+ // The immediate can not be composed via a single instruction.
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
+ if (Insn.size() == 1)
+ return false;
+
+ // Split Imm into (Imm0 << 12) + Imm1;
+ Imm0 = (Imm >> 12) & 0xfff;
+ Imm1 = Imm & 0xfff;
+ return true;
+}
+
+template <typename T>
+bool AArch64MIPeepholeOpt::visitADDSUB(
+ unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
+ SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+ // Try below transformation.
+ //
+ // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
+ // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
+ //
+ // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
+ // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
+ //
+ // The mov pseudo instruction could be expanded to multiple mov instructions
+ // later. Let's try to split the constant operand of mov instruction into two
+ // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
+ // multiple `mov` + `and/sub` instructions.
+
+ return splitTwoPartImm<T>(
+ MI, ToBeRemoved,
+ [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
+ T &Imm1) -> Optional<unsigned> {
+ if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
+ return PosOpc;
+ if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
+ return NegOpc;
+ return None;
+ },
+ [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
+ unsigned Imm1, Register SrcReg, Register NewTmpReg,
+ Register NewDstReg) {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock *MBB = MI.getParent();
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
+ .addReg(SrcReg)
+ .addImm(Imm0)
+ .addImm(12);
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
+ .addReg(NewTmpReg)
+ .addImm(Imm1)
+ .addImm(0);
+ });
+}
- // Check whether AND's MBB is in loop and the AND is loop invariant.
+// Checks if the corresponding MOV immediate instruction is applicable for
+// this peephole optimization.
+bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
+ MachineInstr *&MovMI,
+ MachineInstr *&SubregToRegMI) {
+ // Check whether current MBB is in loop and the AND is loop invariant.
MachineBasicBlock *MBB = MI.getParent();
MachineLoop *L = MLI->getLoopFor(MBB);
if (L && !L->isLoopInvariant(MI))
return false;
- // Check whether AND's operand is MOV with immediate.
- MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
+ // Check whether current MI's operand is MOV with immediate.
+ MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
if (!MovMI)
return false;
- MachineInstr *SubregToRegMI = nullptr;
// If it is SUBREG_TO_REG, check its operand.
+ SubregToRegMI = nullptr;
if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
SubregToRegMI = MovMI;
MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
@@ -159,47 +329,63 @@ bool AArch64MIPeepholeOpt::visitAND(
// more instructions.
if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
return false;
-
if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
return false;
- // Split the bitmask immediate into two.
- T UImm = static_cast<T>(MovMI->getOperand(1).getImm());
+ // It is OK to perform this peephole optimization.
+ return true;
+}
+
+template <typename T>
+bool AArch64MIPeepholeOpt::splitTwoPartImm(
+ MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
+ SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
+ unsigned RegSize = sizeof(T) * 8;
+ assert((RegSize == 32 || RegSize == 64) &&
+ "Invalid RegSize for legal immediate peephole optimization");
+
+ // Perform several essential checks against current MI.
+ MachineInstr *MovMI, *SubregToRegMI;
+ if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
+ return false;
+
+ // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
+ T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
// For the 32 bit form of instruction, the upper 32 bits of the destination
// register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
- // of UImm to zero.
+ // of Imm to zero. This is essential if the Immediate value was a negative
+ // number since it was sign extended when we assign to the 64-bit Imm.
if (SubregToRegMI)
- UImm &= 0xFFFFFFFF;
- T Imm1Enc;
- T Imm2Enc;
- if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc))
+ Imm &= 0xFFFFFFFF;
+ unsigned Opcode;
+ if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
+ Opcode = R.getValue();
+ else
return false;
- // Create new AND MIs.
- DebugLoc DL = MI.getDebugLoc();
- const TargetRegisterClass *ANDImmRC =
- (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
+ // Create new ADD/SUB MIs.
+ MachineFunction *MF = MI.getMF();
+ const TargetRegisterClass *RC =
+ TII->getRegClass(TII->get(Opcode), 0, TRI, *MF);
+ const TargetRegisterClass *ORC =
+ TII->getRegClass(TII->get(Opcode), 1, TRI, *MF);
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
- Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC);
- Register NewDstReg = MRI->createVirtualRegister(ANDImmRC);
- unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri;
-
- MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg));
- BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
- .addReg(SrcReg)
- .addImm(Imm1Enc);
+ Register NewTmpReg = MRI->createVirtualRegister(RC);
+ Register NewDstReg = MRI->createVirtualRegister(RC);
+ MRI->constrainRegClass(SrcReg, RC);
+ MRI->constrainRegClass(NewTmpReg, ORC);
MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
- BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
- .addReg(NewTmpReg)
- .addImm(Imm2Enc);
+
+ BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
MRI->replaceRegWith(DstReg, NewDstReg);
// replaceRegWith changes MI's definition register. Keep it for SSA form until
// deleting MI.
MI.getOperand(0).setReg(DstReg);
+ // Record the MIs need to be removed.
ToBeRemoved.insert(&MI);
if (SubregToRegMI)
ToBeRemoved.insert(SubregToRegMI);
@@ -208,59 +394,17 @@ bool AArch64MIPeepholeOpt::visitAND(
return true;
}
-bool AArch64MIPeepholeOpt::visitORR(
- MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
- // Check this ORR comes from below zero-extend pattern.
- //
- // def : Pat<(i64 (zext GPR32:$src)),
- // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
- if (MI.getOperand(3).getImm() != 0)
- return false;
-
- if (MI.getOperand(1).getReg() != AArch64::WZR)
- return false;
-
- MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
- if (!SrcMI)
- return false;
-
- // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
- //
- // When you use the 32-bit form of an instruction, the upper 32 bits of the
- // source registers are ignored and the upper 32 bits of the destination
- // register are set to zero.
- //
- // If AArch64's 32-bit form of instruction defines the source operand of
- // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
- // real AArch64 instruction and if it is not, do not process the opcode
- // conservatively.
- if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
- return false;
-
- Register DefReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(2).getReg();
- MRI->replaceRegWith(DefReg, SrcReg);
- MRI->clearKillFlags(SrcReg);
- // replaceRegWith changes MI's definition register. Keep it for SSA form until
- // deleting MI.
- MI.getOperand(0).setReg(DefReg);
- ToBeRemoved.insert(&MI);
-
- LLVM_DEBUG({ dbgs() << "Removed: " << MI << "\n"; });
-
- return true;
-}
-
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TRI = static_cast<const AArch64RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
MLI = &getAnalysis<MachineLoopInfo>();
MRI = &MF.getRegInfo();
- if (!MRI->isSSA())
- return false;
+ assert(MRI->isSSA() && "Expected to be run on SSA form!");
bool Changed = false;
SmallSetVector<MachineInstr *, 8> ToBeRemoved;
@@ -271,13 +415,30 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
default:
break;
case AArch64::ANDWrr:
- Changed = visitAND<uint32_t>(MI, ToBeRemoved);
+ Changed = visitAND<uint32_t>(AArch64::ANDWri, MI, ToBeRemoved);
break;
case AArch64::ANDXrr:
- Changed = visitAND<uint64_t>(MI, ToBeRemoved);
+ Changed = visitAND<uint64_t>(AArch64::ANDXri, MI, ToBeRemoved);
break;
case AArch64::ORRWrs:
Changed = visitORR(MI, ToBeRemoved);
+ break;
+ case AArch64::ADDWrr:
+ Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI,
+ ToBeRemoved);
+ break;
+ case AArch64::SUBWrr:
+ Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI,
+ ToBeRemoved);
+ break;
+ case AArch64::ADDXrr:
+ Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI,
+ ToBeRemoved);
+ break;
+ case AArch64::SUBXrr:
+ Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI,
+ ToBeRemoved);
+ break;
}
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index 80d98d17e1d6..2ef7bc83003a 100644
--- a/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -633,7 +633,7 @@ bool AArch64SIMDInstrOpt::optimizeLdStInterleave(MachineInstr &MI) {
/// Return true when the instruction is processed successfully.
bool AArch64SIMDInstrOpt::processSeqRegInst(MachineInstr *DefiningMI,
unsigned* StReg, unsigned* StRegKill, unsigned NumArg) const {
- assert (DefiningMI != NULL);
+ assert(DefiningMI != nullptr);
if (DefiningMI->getOpcode() != AArch64::REG_SEQUENCE)
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index eb55a472a69a..73a680465f6f 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -180,20 +180,22 @@ def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>;
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;
-def AArch64fmaxnm_p : SDNode<"AArch64ISD::FMAXNM_PRED", SDT_AArch64Arith>;
-def AArch64fminnm_p : SDNode<"AArch64ISD::FMINNM_PRED", SDT_AArch64Arith>;
def AArch64fmax_p : SDNode<"AArch64ISD::FMAX_PRED", SDT_AArch64Arith>;
+def AArch64fmaxnm_p : SDNode<"AArch64ISD::FMAXNM_PRED", SDT_AArch64Arith>;
def AArch64fmin_p : SDNode<"AArch64ISD::FMIN_PRED", SDT_AArch64Arith>;
+def AArch64fminnm_p : SDNode<"AArch64ISD::FMINNM_PRED", SDT_AArch64Arith>;
def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>;
def AArch64lsr_p : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>;
def AArch64mul_p : SDNode<"AArch64ISD::MUL_PRED", SDT_AArch64Arith>;
+def AArch64sabd_p : SDNode<"AArch64ISD::ABDS_PRED", SDT_AArch64Arith>;
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>;
def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>;
+def AArch64uabd_p : SDNode<"AArch64ISD::ABDU_PRED", SDT_AArch64Arith>;
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
@@ -277,8 +279,11 @@ def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
return N->hasOneUse();
}]>;
+def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
+ (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>;
+
def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
- (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
+ (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
return N->getFlags().hasNoSignedZeros();
}]>;
@@ -415,6 +420,8 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm UMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64umax_p>;
defm SMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64smin_p>;
defm UMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64umin_p>;
+ defm SABD_ZPZZ : sve_int_bin_pred_bhsd<AArch64sabd_p>;
+ defm UABD_ZPZZ : sve_int_bin_pred_bhsd<AArch64uabd_p>;
defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", AArch64frecpe>;
defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", AArch64frsqrte>;
@@ -469,6 +476,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm FMINNM_ZPZZ : sve_fp_bin_pred_hfd<AArch64fminnm_p>;
defm FMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmax_p>;
defm FMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmin_p>;
+ defm FABD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fabd_p>;
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
} // End HasSVEorStreamingSVE
@@ -642,11 +650,11 @@ let Predicates = [HasSVEorStreamingSVE] in {
(DUP_ZI_D $a, $b)>;
// Duplicate immediate FP into all vector elements.
- def : Pat<(nxv2f32 (AArch64dup (f32 fpimm:$val))),
+ def : Pat<(nxv2f32 (AArch64dup (f32 fpimm:$val))),
(DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>;
- def : Pat<(nxv4f32 (AArch64dup (f32 fpimm:$val))),
+ def : Pat<(nxv4f32 (AArch64dup (f32 fpimm:$val))),
(DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>;
- def : Pat<(nxv2f64 (AArch64dup (f64 fpimm:$val))),
+ def : Pat<(nxv2f64 (AArch64dup (f64 fpimm:$val))),
(DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>;
// Duplicate FP immediate into all vector elements
@@ -722,11 +730,11 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs", null_frag>;
def PTEST_PP : sve_int_ptest<0b010000, "ptest">;
- def PFALSE : sve_int_pfalse<0b000000, "pfalse">;
+ defm PFALSE : sve_int_pfalse<0b000000, "pfalse">;
defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
- defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z, and>;
+ defm AND_PPzPP : sve_int_pred_log_and<0b0000, "and", int_aarch64_sve_and_z>;
defm BIC_PPzPP : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>;
defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>;
defm SEL_PPPP : sve_int_pred_log<0b0011, "sel", vselect>;
@@ -1419,6 +1427,16 @@ let Predicates = [HasSVEorStreamingSVE] in {
(INSR_ZV_D ZPR:$Z2, (INSERT_SUBREG (IMPLICIT_DEF),
(LASTB_VPZ_D (PTRUE_D 31), ZPR:$Z1), dsub))>;
+ // Splice with lane bigger or equal to 0
+ def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 (sve_ext_imm_0_255 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
+ def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 (sve_ext_imm_0_127 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
+ def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 (sve_ext_imm_0_63 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
+ def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_31 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
+
defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
@@ -2496,6 +2514,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
// 16-element contiguous store
defm : st1<ST1B, ST1B_IMM, nxv16i8, AArch64st1, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
+ // Insert scalar into undef[0]
def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i32 FPR32:$src), 0)),
(INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
def : Pat<(nxv8i16 (vector_insert (nxv8i16 (undef)), (i32 FPR32:$src), 0)),
@@ -2691,17 +2710,6 @@ let Predicates = [HasSVEorStreamingSVE] in {
def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
(f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>;
}
-
- // Splice with lane bigger or equal to 0
- def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 (sve_ext_imm_0_255 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
- def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 (sve_ext_imm_0_127 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
- def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 (sve_ext_imm_0_63 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
- def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_31 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, imm0_255:$index)>;
-
} // End HasSVEorStreamingSVE
let Predicates = [HasSVE, HasMatMulInt8] in {
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td
index 877c4d2ced41..009219ce3c54 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -235,10 +235,14 @@ def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
//---
// Miscellaneous
//---
-def : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W")>;
-def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS[^W]")>;
-def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)")>;
-def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ")>;
+def : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?Wi")>;
+def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPSi")>;
+def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)i")>;
+def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQi")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ(pre|post)")>;
def : InstRW<[WriteI], (instrs COPY)>;
//---
// Vector Loads - 64-bit per cycle
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td
index 168a762241ca..a860aa907fd1 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -526,7 +526,7 @@ def : InstRW<[A57Write_5cyc_2V], (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
def : InstRW<[A57Write_3cyc_2V], (instregex "^(BIF|BIT|BSL|BSP)v16i8")>;
// ASIMD duplicate, gen reg, D-form and Q-form
-def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^CPY")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUP(i8|i16|i32|i64)$")>;
def : InstRW<[A57Write_8cyc_1L_1V], (instregex "^DUPv.+gpr")>;
// ASIMD move, saturating
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
index 1d25a6c00f95..fa10d056b7f7 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -1891,7 +1891,7 @@ def : InstRW<[A64FXWrite_4Cyc_GI0],
// ASIMD duplicate, gen reg
// ASIMD duplicate, element
def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>;
-def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^CPY")>;
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>;
def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>;
// ASIMD extract
@@ -2512,16 +2512,16 @@ def : InstRW<[A64FXWrite_1Cyc_GI24], (instrs CNTW_XPiI)>;
def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs COMPACT_ZPZ_D, COMPACT_ZPZ_S)>;
// [72] "cpy $Zd, $Pg/m, $Rn";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmR_B, CPY_ZPmR_D, CPY_ZPmR_H, CPY_ZPmR_S)>;
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPmR_B, CPY_ZPmR_D, CPY_ZPmR_H, CPY_ZPmR_S)>;
// [73] "cpy $Zd, $Pg/m, $Vn";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmV_B, CPY_ZPmV_D, CPY_ZPmV_H, CPY_ZPmV_S)>;
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPmV_B, CPY_ZPmV_D, CPY_ZPmV_H, CPY_ZPmV_S)>;
// [74] "cpy $Zd, $Pg/m, $imm";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPmI_B, CPY_ZPmI_D, CPY_ZPmI_H, CPY_ZPmI_S)>;
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPmI_B, CPY_ZPmI_D, CPY_ZPmI_H, CPY_ZPmI_S)>;
// [75] "cpy $Zd, $Pg/z, $imm";
-//@@@ def : InstRW<[XXXXXX], (instrs CPY_ZPzI_B, CPY_ZPzI_D, CPY_ZPzI_H, CPY_ZPzI_S)>;
+def : InstRW<[A64FXWrite_6Cyc_GI0], (instrs CPY_ZPzI_B, CPY_ZPzI_D, CPY_ZPzI_H, CPY_ZPzI_S)>;
// [76] "ctermeq $Rn, $Rm";
def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs CTERMEQ_WW, CTERMEQ_XX)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index 14df8236504b..d66efb82fccc 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -669,7 +669,7 @@ def : InstRW<[M3WriteNEONB], (instregex "^DUPv.+gpr")>;
def : InstRW<[M3WriteNSHF1], (instregex "^DUPv.+lane")>;
def : InstRW<[M3WriteNSHF1], (instregex "^EXTv")>;
def : InstRW<[M3WriteNSHF1], (instregex "^[SU]?Q?XTU?Nv")>;
-def : InstRW<[M3WriteNSHF1], (instregex "^CPY")>;
+def : InstRW<[M3WriteNSHF1], (instregex "^DUP(i8|i16|i32|i64)$")>;
def : InstRW<[M3WriteNSHF1], (instregex "^INSv.+lane")>;
def : InstRW<[M3WriteMOVI], (instregex "^MOVI")>;
def : InstRW<[M3WriteNALU1], (instregex "^FMOVv")>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
index 8f740a9a0d35..94e70793e855 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -810,7 +810,7 @@ def : InstRW<[M4WriteNALU1], (instregex "^RBITv")>;
def : InstRW<[M4WriteNALU1], (instregex "^(BIF|BIT|BSL|BSP)v")>;
def : InstRW<[M4WriteNALU1], (instregex "^CL[STZ]v")>;
def : InstRW<[M4WriteNEONB], (instregex "^DUPv.+gpr")>;
-def : InstRW<[M4WriteNSHF1], (instregex "^CPY")>;
+def : InstRW<[M4WriteNSHF1], (instregex "^DUP(i8|i16|i32|i64)$")>;
def : InstRW<[M4WriteNSHF1], (instregex "^DUPv.+lane")>;
def : InstRW<[M4WriteNSHF1], (instregex "^EXTv")>;
def : InstRW<[M4WriteNSHT4A], (instregex "^XTNv")>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
index 93e1b66bea03..1db5f5322a64 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
@@ -848,7 +848,7 @@ def : InstRW<[M5WriteNALU2], (instregex "^RBITv")>;
def : InstRW<[M5WriteNALU2], (instregex "^(BIF|BIT|BSL|BSP)v")>;
def : InstRW<[M5WriteNALU2], (instregex "^CL[STZ]v")>;
def : InstRW<[M5WriteNEONB], (instregex "^DUPv.+gpr")>;
-def : InstRW<[M5WriteNSHF2], (instregex "^CPY")>;
+def : InstRW<[M5WriteNSHF2], (instregex "^DUP(i8|i16|i32|i64)$")>;
def : InstRW<[M5WriteNSHF2], (instregex "^DUPv.+lane")>;
def : InstRW<[M5WriteNSHF2], (instregex "^EXTv")>;
def : InstRW<[M5WriteNSHT4A], (instregex "^XTNv")>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index f2cd83caffa2..a3a038f869fb 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -908,7 +908,7 @@ def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(i8|i16|i32|i64)$")>;
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL|BSP)v8i8$")>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index e4cae97b5524..ffa0a5e7d91a 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -1499,7 +1499,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01],
// ASIMD duplicate, gen reg
// ASIMD duplicate, element
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>;
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUP(i8|i16|i32|i64)$")>;
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>;
// ASIMD extract
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
index 08be2b3a55b3..46a1c217f984 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
@@ -1608,7 +1608,7 @@ def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123],
// ASIMD duplicate, gen reg
// ASIMD duplicate, element
def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv")>;
-def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^CPY")>;
+def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUP(i8|i16|i32|i64)$")>;
def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv.+gpr")>;
// ASIMD extract
diff --git a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
index 7307961ddb5f..87be7bb6d113 100644
--- a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -304,7 +304,7 @@ bool AArch64SpeculationHardening::instrumentControlFlow(
// sure if that would actually result in a big performance difference
// though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
// already to do this - but it's unclear if that could easily be used here.
- unsigned TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
+ Register TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
LLVM_DEBUG(dbgs() << "RS finds "
<< ((TmpReg == 0) ? "no register " : "register ");
if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
diff --git a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
index d2488f61eb4b..cae6d65bed2d 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
@@ -195,7 +195,7 @@ void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
for (auto *I : ReTags) {
- unsigned TaggedReg = I->getOperand(0).getReg();
+ Register TaggedReg = I->getOperand(0).getReg();
int FI = I->getOperand(1).getIndex();
uncheckUsesOf(TaggedReg, FI);
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index f7d3dd0bc222..a4f4b8582182 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/AArch64TargetParser.h"
#include "llvm/Support/TargetParser.h"
using namespace llvm;
@@ -157,13 +158,19 @@ void AArch64Subtarget::initializeProperties() {
break;
case NeoverseN1:
PrefFunctionLogAlignment = 4;
+ PrefLoopLogAlignment = 5;
+ MaxBytesForLoopAlignment = 16;
break;
case NeoverseN2:
PrefFunctionLogAlignment = 4;
+ PrefLoopLogAlignment = 5;
+ MaxBytesForLoopAlignment = 16;
VScaleForTuning = 1;
break;
case NeoverseV1:
PrefFunctionLogAlignment = 4;
+ PrefLoopLogAlignment = 5;
+ MaxBytesForLoopAlignment = 16;
VScaleForTuning = 2;
break;
case Neoverse512TVB:
@@ -228,8 +235,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
IsLittle(LittleEndian),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
- FrameLowering(),
- InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)), TSInfo(),
+ InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
TLInfo(TM, *this) {
if (AArch64::isX18ReservedByDefault(TT))
ReserveXRegister.set(18);
@@ -367,9 +373,4 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
MFI.computeMaxCallFrameSize(MF);
}
-bool AArch64Subtarget::useSVEForFixedLengthVectors() const {
- // Prefer NEON unless larger SVE registers are available.
- return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
-}
-
bool AArch64Subtarget::useAA() const { return UseAA; }
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index b3cd5ebd5f65..3e3c0f6aba15 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -94,9 +94,11 @@ protected:
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
bool HasV8_7aOps = false;
+ bool HasV8_8aOps = false;
bool HasV9_0aOps = false;
bool HasV9_1aOps = false;
bool HasV9_2aOps = false;
+ bool HasV9_3aOps = false;
bool HasV8_0rOps = false;
bool HasCONTEXTIDREL2 = false;
@@ -188,6 +190,10 @@ protected:
bool HasHCX = false;
bool HasLS64 = false;
+ // Armv8.8-A Extensions
+ bool HasHBC = false;
+ bool HasMOPS = false;
+
// Arm SVE2 extensions
bool HasSVE2 = false;
bool HasSVE2AES = false;
@@ -274,6 +280,7 @@ protected:
unsigned MaxPrefetchIterationsAhead = UINT_MAX;
unsigned PrefFunctionLogAlignment = 0;
unsigned PrefLoopLogAlignment = 0;
+ unsigned MaxBytesForLoopAlignment = 0;
unsigned MaxJumpTableSize = 0;
unsigned WideningBaseCost = 0;
@@ -365,6 +372,7 @@ public:
bool hasV9_0aOps() const { return HasV9_0aOps; }
bool hasV9_1aOps() const { return HasV9_1aOps; }
bool hasV9_2aOps() const { return HasV9_2aOps; }
+ bool hasV9_3aOps() const { return HasV9_3aOps; }
bool hasV8_0rOps() const { return HasV8_0rOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
@@ -464,6 +472,10 @@ public:
}
unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }
+ unsigned getMaxBytesForLoopAlignment() const {
+ return MaxBytesForLoopAlignment;
+ }
+
unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
unsigned getWideningBaseCost() const { return WideningBaseCost; }
@@ -572,6 +584,8 @@ public:
bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
bool hasEL2VMSA() const { return HasEL2VMSA; }
bool hasEL3() const { return HasEL3; }
+ bool hasHBC() const { return HasHBC; }
+ bool hasMOPS() const { return HasMOPS; }
bool fixCortexA53_835769() const { return FixCortexA53_835769; }
@@ -666,7 +680,10 @@ public:
return MinSVEVectorSizeInBits;
}
- bool useSVEForFixedLengthVectors() const;
+ bool useSVEForFixedLengthVectors() const {
+ // Prefer NEON unless larger SVE registers are available.
+ return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
+ }
unsigned getVScaleForTuning() const { return VScaleForTuning; }
};
diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index f9fe804865a5..cce5813fe6e9 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -1333,7 +1333,7 @@ def : RWSysReg<"PRBAR_EL2", 0b11, 0b100, 0b0110, 0b1000, 0b000>;
def : RWSysReg<"PRLAR_EL1", 0b11, 0b000, 0b0110, 0b1000, 0b001>;
def : RWSysReg<"PRLAR_EL2", 0b11, 0b100, 0b0110, 0b1000, 0b001>;
-foreach n = 0-15 in {
+foreach n = 1-15 in {
foreach x = 1-2 in {
//Direct acces to Protection Region Base Address Register for n th MPU region
def : RWSysReg<!strconcat("PRBAR"#n, "_EL"#x),
@@ -1348,7 +1348,7 @@ foreach x = 1-2 in {
let Encoding{13} = !add(x,-1);
}
} //foreach x = 1-2 in
-} //foreach n = 0-15 in
+} //foreach n = 1-15 in
} //let Requires = [{ {AArch64::HasV8_0rOps} }] in
// v8.1a "Privileged Access Never" extension-specific system registers
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index 25e626134317..7d314bce99b1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -20,8 +20,6 @@
namespace llvm {
-class AArch64RegisterBankInfo;
-
class AArch64TargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index dfc66f0cb4c1..7ed934cfabc0 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -25,8 +25,7 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
SupportDebugThreadLocalLocation = false;
}
-AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile()
- : TargetLoweringObjectFileMachO() {
+AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile() {
SupportGOTPCRelWithOffset = false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
index 28324c2ae608..9f098230bbd7 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -13,7 +13,6 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
-class AArch64TargetMachine;
/// This implementation is used for AArch64 ELF targets (Linux in particular).
class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d21854e38f5a..a4d666a0a3c2 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -331,6 +331,45 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
break;
}
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ static const CostTblEntry WithOverflowCostTbl[] = {
+ {Intrinsic::sadd_with_overflow, MVT::i8, 3},
+ {Intrinsic::uadd_with_overflow, MVT::i8, 3},
+ {Intrinsic::sadd_with_overflow, MVT::i16, 3},
+ {Intrinsic::uadd_with_overflow, MVT::i16, 3},
+ {Intrinsic::sadd_with_overflow, MVT::i32, 1},
+ {Intrinsic::uadd_with_overflow, MVT::i32, 1},
+ {Intrinsic::sadd_with_overflow, MVT::i64, 1},
+ {Intrinsic::uadd_with_overflow, MVT::i64, 1},
+ {Intrinsic::ssub_with_overflow, MVT::i8, 3},
+ {Intrinsic::usub_with_overflow, MVT::i8, 3},
+ {Intrinsic::ssub_with_overflow, MVT::i16, 3},
+ {Intrinsic::usub_with_overflow, MVT::i16, 3},
+ {Intrinsic::ssub_with_overflow, MVT::i32, 1},
+ {Intrinsic::usub_with_overflow, MVT::i32, 1},
+ {Intrinsic::ssub_with_overflow, MVT::i64, 1},
+ {Intrinsic::usub_with_overflow, MVT::i64, 1},
+ {Intrinsic::smul_with_overflow, MVT::i8, 5},
+ {Intrinsic::umul_with_overflow, MVT::i8, 4},
+ {Intrinsic::smul_with_overflow, MVT::i16, 5},
+ {Intrinsic::umul_with_overflow, MVT::i16, 4},
+ {Intrinsic::smul_with_overflow, MVT::i32, 2}, // eg umull;tst
+ {Intrinsic::umul_with_overflow, MVT::i32, 2}, // eg umull;cmp sxtw
+ {Intrinsic::smul_with_overflow, MVT::i64, 3}, // eg mul;smulh;cmp
+ {Intrinsic::umul_with_overflow, MVT::i64, 3}, // eg mul;umulh;cmp asr
+ };
+ EVT MTy = TLI->getValueType(DL, RetTy->getContainedType(0), true);
+ if (MTy.isSimple())
+ if (const auto *Entry = CostTableLookup(WithOverflowCostTbl, ICA.getID(),
+ MTy.getSimpleVT()))
+ return Entry->Cost;
+ break;
+ }
default:
break;
}
@@ -377,12 +416,76 @@ static Optional<Instruction *> processPhiNode(InstCombiner &IC,
return IC.replaceInstUsesWith(II, NPN);
}
+// (from_svbool (binop (to_svbool pred) (svbool_t _) (svbool_t _))))
+// => (binop (pred) (from_svbool _) (from_svbool _))
+//
+// The above transformation eliminates a `to_svbool` in the predicate
+// operand of bitwise operation `binop` by narrowing the vector width of
+// the operation. For example, it would convert a `<vscale x 16 x i1>
+// and` into a `<vscale x 4 x i1> and`. This is profitable because
+// to_svbool must zero the new lanes during widening, whereas
+// from_svbool is free.
+static Optional<Instruction *> tryCombineFromSVBoolBinOp(InstCombiner &IC,
+ IntrinsicInst &II) {
+ auto BinOp = dyn_cast<IntrinsicInst>(II.getOperand(0));
+ if (!BinOp)
+ return None;
+
+ auto IntrinsicID = BinOp->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::aarch64_sve_and_z:
+ case Intrinsic::aarch64_sve_bic_z:
+ case Intrinsic::aarch64_sve_eor_z:
+ case Intrinsic::aarch64_sve_nand_z:
+ case Intrinsic::aarch64_sve_nor_z:
+ case Intrinsic::aarch64_sve_orn_z:
+ case Intrinsic::aarch64_sve_orr_z:
+ break;
+ default:
+ return None;
+ }
+
+ auto BinOpPred = BinOp->getOperand(0);
+ auto BinOpOp1 = BinOp->getOperand(1);
+ auto BinOpOp2 = BinOp->getOperand(2);
+
+ auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
+ if (!PredIntr ||
+ PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
+ return None;
+
+ auto PredOp = PredIntr->getOperand(0);
+ auto PredOpTy = cast<VectorType>(PredOp->getType());
+ if (PredOpTy != II.getType())
+ return None;
+
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+
+ SmallVector<Value *> NarrowedBinOpArgs = {PredOp};
+ auto NarrowBinOpOp1 = Builder.CreateIntrinsic(
+ Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
+ NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
+ if (BinOpOp1 == BinOpOp2)
+ NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
+ else
+ NarrowedBinOpArgs.push_back(Builder.CreateIntrinsic(
+ Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
+
+ auto NarrowedBinOp =
+ Builder.CreateIntrinsic(IntrinsicID, {PredOpTy}, NarrowedBinOpArgs);
+ return IC.replaceInstUsesWith(II, NarrowedBinOp);
+}
+
static Optional<Instruction *> instCombineConvertFromSVBool(InstCombiner &IC,
IntrinsicInst &II) {
// If the reinterpret instruction operand is a PHI Node
if (isa<PHINode>(II.getArgOperand(0)))
return processPhiNode(IC, II);
+ if (auto BinOpCombine = tryCombineFromSVBoolBinOp(IC, II))
+ return BinOpCombine;
+
SmallVector<Instruction *, 32> CandidatesForRemoval;
Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr;
@@ -1129,6 +1232,32 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return None;
}
+Optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
+ APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const {
+ switch (II.getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::aarch64_neon_fcvtxn:
+ case Intrinsic::aarch64_neon_rshrn:
+ case Intrinsic::aarch64_neon_sqrshrn:
+ case Intrinsic::aarch64_neon_sqrshrun:
+ case Intrinsic::aarch64_neon_sqshrn:
+ case Intrinsic::aarch64_neon_sqshrun:
+ case Intrinsic::aarch64_neon_sqxtn:
+ case Intrinsic::aarch64_neon_sqxtun:
+ case Intrinsic::aarch64_neon_uqrshrn:
+ case Intrinsic::aarch64_neon_uqshrn:
+ case Intrinsic::aarch64_neon_uqxtn:
+ SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);
+ break;
+ }
+
+ return None;
+}
+
bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
ArrayRef<const Value *> Args) {
@@ -1461,6 +1590,15 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
{ ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
+ // Bitcasts from float to integer
+ { ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0 },
+ { ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0 },
+ { ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0 },
+
+ // Bitcasts from integer to float
+ { ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0 },
+ { ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0 },
+ { ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0 },
};
if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
@@ -1555,9 +1693,12 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (!LT.second.isVector())
return 0;
- // The type may be split. Normalize the index to the new type.
- unsigned Width = LT.second.getVectorNumElements();
- Index = Index % Width;
+ // The type may be split. For fixed-width vectors we can normalize the
+ // index to the new type.
+ if (LT.second.isFixedLengthVector()) {
+ unsigned Width = LT.second.getVectorNumElements();
+ Index = Index % Width;
+ }
// The element at index zero is already inside the vector.
if (Index == 0)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index c3e1735cd4cd..a6029b9f2445 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -106,6 +106,12 @@ public:
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const;
+
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
switch (K) {
case TargetTransformInfo::RGK_Scalar:
@@ -307,6 +313,10 @@ public:
return 2;
}
+ bool emitGetActiveLaneMask() const {
+ return ST->hasSVE();
+ }
+
bool supportsScalableVectors() const { return ST->hasSVE(); }
bool enableScalableVectorization() const { return ST->hasSVE(); }
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 62038b10fccd..33ed7ae9780e 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -48,6 +48,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/AArch64TargetParser.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -3284,6 +3285,8 @@ static const struct Extension {
{"sme", {AArch64::FeatureSME}},
{"sme-f64", {AArch64::FeatureSMEF64}},
{"sme-i64", {AArch64::FeatureSMEI64}},
+ {"hbc", {AArch64::FeatureHBC}},
+ {"mops", {AArch64::FeatureMOPS}},
// FIXME: Unsupported extensions
{"lor", {}},
{"rdma", {}},
@@ -3307,12 +3310,16 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv8.6a";
else if (FBS[AArch64::HasV8_7aOps])
Str += "ARMv8.7a";
+ else if (FBS[AArch64::HasV8_8aOps])
+ Str += "ARMv8.8a";
else if (FBS[AArch64::HasV9_0aOps])
Str += "ARMv9-a";
else if (FBS[AArch64::HasV9_1aOps])
Str += "ARMv9.1a";
else if (FBS[AArch64::HasV9_2aOps])
Str += "ARMv9.2a";
+ else if (FBS[AArch64::HasV9_3aOps])
+ Str += "ARMv9.3a";
else if (FBS[AArch64::HasV8_0rOps])
Str += "ARMv8r";
else {
@@ -4531,7 +4538,7 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
Mnemonic = Head;
// Handle condition codes for a branch mnemonic
- if (Head == "b" && Next != StringRef::npos) {
+ if ((Head == "b" || Head == "bc") && Next != StringRef::npos) {
Start = Next;
Next = Name.find('.', Start + 1);
Head = Name.slice(Start + 1, Next);
@@ -4862,6 +4869,177 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
}
}
+ // Check v8.8-A memops instructions.
+ switch (Inst.getOpcode()) {
+ case AArch64::CPYFP:
+ case AArch64::CPYFPWN:
+ case AArch64::CPYFPRN:
+ case AArch64::CPYFPN:
+ case AArch64::CPYFPWT:
+ case AArch64::CPYFPWTWN:
+ case AArch64::CPYFPWTRN:
+ case AArch64::CPYFPWTN:
+ case AArch64::CPYFPRT:
+ case AArch64::CPYFPRTWN:
+ case AArch64::CPYFPRTRN:
+ case AArch64::CPYFPRTN:
+ case AArch64::CPYFPT:
+ case AArch64::CPYFPTWN:
+ case AArch64::CPYFPTRN:
+ case AArch64::CPYFPTN:
+ case AArch64::CPYFM:
+ case AArch64::CPYFMWN:
+ case AArch64::CPYFMRN:
+ case AArch64::CPYFMN:
+ case AArch64::CPYFMWT:
+ case AArch64::CPYFMWTWN:
+ case AArch64::CPYFMWTRN:
+ case AArch64::CPYFMWTN:
+ case AArch64::CPYFMRT:
+ case AArch64::CPYFMRTWN:
+ case AArch64::CPYFMRTRN:
+ case AArch64::CPYFMRTN:
+ case AArch64::CPYFMT:
+ case AArch64::CPYFMTWN:
+ case AArch64::CPYFMTRN:
+ case AArch64::CPYFMTN:
+ case AArch64::CPYFE:
+ case AArch64::CPYFEWN:
+ case AArch64::CPYFERN:
+ case AArch64::CPYFEN:
+ case AArch64::CPYFEWT:
+ case AArch64::CPYFEWTWN:
+ case AArch64::CPYFEWTRN:
+ case AArch64::CPYFEWTN:
+ case AArch64::CPYFERT:
+ case AArch64::CPYFERTWN:
+ case AArch64::CPYFERTRN:
+ case AArch64::CPYFERTN:
+ case AArch64::CPYFET:
+ case AArch64::CPYFETWN:
+ case AArch64::CPYFETRN:
+ case AArch64::CPYFETN:
+ case AArch64::CPYP:
+ case AArch64::CPYPWN:
+ case AArch64::CPYPRN:
+ case AArch64::CPYPN:
+ case AArch64::CPYPWT:
+ case AArch64::CPYPWTWN:
+ case AArch64::CPYPWTRN:
+ case AArch64::CPYPWTN:
+ case AArch64::CPYPRT:
+ case AArch64::CPYPRTWN:
+ case AArch64::CPYPRTRN:
+ case AArch64::CPYPRTN:
+ case AArch64::CPYPT:
+ case AArch64::CPYPTWN:
+ case AArch64::CPYPTRN:
+ case AArch64::CPYPTN:
+ case AArch64::CPYM:
+ case AArch64::CPYMWN:
+ case AArch64::CPYMRN:
+ case AArch64::CPYMN:
+ case AArch64::CPYMWT:
+ case AArch64::CPYMWTWN:
+ case AArch64::CPYMWTRN:
+ case AArch64::CPYMWTN:
+ case AArch64::CPYMRT:
+ case AArch64::CPYMRTWN:
+ case AArch64::CPYMRTRN:
+ case AArch64::CPYMRTN:
+ case AArch64::CPYMT:
+ case AArch64::CPYMTWN:
+ case AArch64::CPYMTRN:
+ case AArch64::CPYMTN:
+ case AArch64::CPYE:
+ case AArch64::CPYEWN:
+ case AArch64::CPYERN:
+ case AArch64::CPYEN:
+ case AArch64::CPYEWT:
+ case AArch64::CPYEWTWN:
+ case AArch64::CPYEWTRN:
+ case AArch64::CPYEWTN:
+ case AArch64::CPYERT:
+ case AArch64::CPYERTWN:
+ case AArch64::CPYERTRN:
+ case AArch64::CPYERTN:
+ case AArch64::CPYET:
+ case AArch64::CPYETWN:
+ case AArch64::CPYETRN:
+ case AArch64::CPYETN: {
+ unsigned Xd_wb = Inst.getOperand(0).getReg();
+ unsigned Xs_wb = Inst.getOperand(1).getReg();
+ unsigned Xn_wb = Inst.getOperand(2).getReg();
+ unsigned Xd = Inst.getOperand(3).getReg();
+ unsigned Xs = Inst.getOperand(4).getReg();
+ unsigned Xn = Inst.getOperand(5).getReg();
+ if (Xd_wb != Xd)
+ return Error(Loc[0],
+ "invalid CPY instruction, Xd_wb and Xd do not match");
+ if (Xs_wb != Xs)
+ return Error(Loc[0],
+ "invalid CPY instruction, Xs_wb and Xs do not match");
+ if (Xn_wb != Xn)
+ return Error(Loc[0],
+ "invalid CPY instruction, Xn_wb and Xn do not match");
+ if (Xd == Xs)
+ return Error(Loc[0], "invalid CPY instruction, destination and source"
+ " registers are the same");
+ if (Xd == Xn)
+ return Error(Loc[0], "invalid CPY instruction, destination and size"
+ " registers are the same");
+ if (Xs == Xn)
+ return Error(Loc[0], "invalid CPY instruction, source and size"
+ " registers are the same");
+ break;
+ }
+ case AArch64::SETP:
+ case AArch64::SETPT:
+ case AArch64::SETPN:
+ case AArch64::SETPTN:
+ case AArch64::SETM:
+ case AArch64::SETMT:
+ case AArch64::SETMN:
+ case AArch64::SETMTN:
+ case AArch64::SETE:
+ case AArch64::SETET:
+ case AArch64::SETEN:
+ case AArch64::SETETN:
+ case AArch64::SETGP:
+ case AArch64::SETGPT:
+ case AArch64::SETGPN:
+ case AArch64::SETGPTN:
+ case AArch64::SETGM:
+ case AArch64::SETGMT:
+ case AArch64::SETGMN:
+ case AArch64::SETGMTN:
+ case AArch64::MOPSSETGE:
+ case AArch64::MOPSSETGET:
+ case AArch64::MOPSSETGEN:
+ case AArch64::MOPSSETGETN: {
+ unsigned Xd_wb = Inst.getOperand(0).getReg();
+ unsigned Xn_wb = Inst.getOperand(1).getReg();
+ unsigned Xd = Inst.getOperand(2).getReg();
+ unsigned Xn = Inst.getOperand(3).getReg();
+ unsigned Xm = Inst.getOperand(4).getReg();
+ if (Xd_wb != Xd)
+ return Error(Loc[0],
+ "invalid SET instruction, Xd_wb and Xd do not match");
+ if (Xn_wb != Xn)
+ return Error(Loc[0],
+ "invalid SET instruction, Xn_wb and Xn do not match");
+ if (Xd == Xn)
+ return Error(Loc[0], "invalid SET instruction, destination and size"
+ " registers are the same");
+ if (Xd == Xm)
+ return Error(Loc[0], "invalid SET instruction, destination and source"
+ " registers are the same");
+ if (Xn == Xm)
+ return Error(Loc[0], "invalid SET instruction, source and size"
+ " registers are the same");
+ break;
+ }
+ }
// Now check immediate ranges. Separate from the above as there is overlap
// in the instructions being checked and this keeps the nested conditionals
@@ -5931,9 +6109,11 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
case AArch64::ArchKind::ARMV8_7A:
+ case AArch64::ArchKind::ARMV8_8A:
case AArch64::ArchKind::ARMV9A:
case AArch64::ArchKind::ARMV9_1A:
case AArch64::ArchKind::ARMV9_2A:
+ case AArch64::ArchKind::ARMV9_3A:
case AArch64::ArchKind::ARMV8R:
RequestedExtensions.push_back("sm4");
RequestedExtensions.push_back("sha3");
@@ -5956,6 +6136,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
case AArch64::ArchKind::ARMV8_7A:
+ case AArch64::ArchKind::ARMV8_8A:
case AArch64::ArchKind::ARMV9A:
case AArch64::ArchKind::ARMV9_1A:
case AArch64::ArchKind::ARMV9_2A:
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 96d410e42be2..9ce00f76d9c7 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -238,6 +238,12 @@ static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
static DecodeStatus DecodeSVCROp(MCInst &Inst, unsigned Imm, uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeCPYMemOpInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
+ const void *Decoder);
+static DecodeStatus DecodeSETMemOpInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
+ const void *Decoder);
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
@@ -1842,3 +1848,52 @@ static DecodeStatus DecodeSVCROp(MCInst &Inst, unsigned Imm, uint64_t Address,
}
return Fail;
}
+
+static DecodeStatus DecodeCPYMemOpInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(insn, 0, 5);
+ unsigned Rs = fieldFromInstruction(insn, 16, 5);
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+
+ // None of the registers may alias: if they do, then the instruction is not
+ // merely unpredictable but actually entirely unallocated.
+ if (Rd == Rs || Rs == Rn || Rd == Rn)
+ return MCDisassembler::Fail;
+
+ // All three register operands are written back, so they all appear
+ // twice in the operand list, once as outputs and once as inputs.
+ if (!DecodeGPR64commonRegisterClass(Inst, Rd, Addr, Decoder) ||
+ !DecodeGPR64commonRegisterClass(Inst, Rs, Addr, Decoder) ||
+ !DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder) ||
+ !DecodeGPR64commonRegisterClass(Inst, Rd, Addr, Decoder) ||
+ !DecodeGPR64commonRegisterClass(Inst, Rs, Addr, Decoder) ||
+ !DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder))
+ return MCDisassembler::Fail;
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSETMemOpInstruction(MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(insn, 0, 5);
+ unsigned Rm = fieldFromInstruction(insn, 16, 5);
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+
+ // None of the registers may alias: if they do, then the instruction is not
+ // merely unpredictable but actually entirely unallocated.
+ if (Rd == Rm || Rm == Rn || Rd == Rn)
+ return MCDisassembler::Fail;
+
+ // Rd and Rn (not Rm) register operands are written back, so they appear
+ // twice in the operand list, once as outputs and once as inputs.
+ if (!DecodeGPR64commonRegisterClass(Inst, Rd, Addr, Decoder) ||
+ !DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder) ||
+ !DecodeGPR64commonRegisterClass(Inst, Rd, Addr, Decoder) ||
+ !DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder) ||
+ !DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder))
+ return MCDisassembler::Fail;
+
+ return MCDisassembler::Success;
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index ac08ee8ae8dd..097b93e4fcca 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -1112,6 +1112,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
}
+ Info.IsTailCall = CanTailCallOpt;
if (CanTailCallOpt)
return lowerTailCall(MIRBuilder, Info, OutArgs);
@@ -1179,7 +1180,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (!determineAndHandleAssignments(
UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs,
MIRBuilder, Info.CallConv, Info.IsVarArg,
- UsingReturnedArg ? OutArgs[0].Regs[0] : Register()))
+ UsingReturnedArg ? makeArrayRef(OutArgs[0].Regs) : None))
return false;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
index add0342c90fd..aafb1d19640a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
@@ -24,9 +24,7 @@ namespace llvm {
class AArch64TargetLowering;
class CCValAssign;
-class DataLayout;
class MachineIRBuilder;
-class MachineRegisterInfo;
class Type;
class AArch64CallLowering: public CallLowering {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 3d9a626d3ac3..1f546ad50d57 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -18,7 +18,6 @@
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
-#include "AArch64GlobalISelUtils.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
@@ -472,8 +471,8 @@ private:
AArch64InstructionSelector::AArch64InstructionSelector(
const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI)
- : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI),
+ : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
+ RBI(RBI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_INIT
@@ -3937,19 +3936,19 @@ static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
// vector's elements.
switch (EltSize) {
case 8:
- CopyOpc = AArch64::CPYi8;
+ CopyOpc = AArch64::DUPi8;
ExtractSubReg = AArch64::bsub;
break;
case 16:
- CopyOpc = AArch64::CPYi16;
+ CopyOpc = AArch64::DUPi16;
ExtractSubReg = AArch64::hsub;
break;
case 32:
- CopyOpc = AArch64::CPYi32;
+ CopyOpc = AArch64::DUPi32;
ExtractSubReg = AArch64::ssub;
break;
case 64:
- CopyOpc = AArch64::CPYi64;
+ CopyOpc = AArch64::DUPi64;
ExtractSubReg = AArch64::dsub;
break;
default:
@@ -5469,8 +5468,8 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
// Insert the copy from LR/X30 into the entry block, before it can be
// clobbered by anything.
MFI.setReturnAddressIsTaken(true);
- MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
- AArch64::GPR64RegClass);
+ MFReturnAddr = getFunctionLiveInPhysReg(
+ MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
}
if (STI.hasPAuth()) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 35456d95dc2b..e2c46f4b4c1f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -21,7 +21,6 @@
namespace llvm {
-class LLVMContext;
class AArch64Subtarget;
/// This class provides the information for the target register banks.
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 7274ae79f74a..225e0c8e55fc 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -19,7 +19,6 @@
namespace llvm {
class MCStreamer;
-class Target;
class Triple;
struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 941226b83e44..66cb7a37a958 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -30,11 +30,7 @@ class MCStreamer;
class MCSubtargetInfo;
class MCTargetOptions;
class MCTargetStreamer;
-class StringRef;
class Target;
-class Triple;
-class raw_ostream;
-class raw_pwrite_stream;
MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index bb488cd7da32..574b22124957 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -334,6 +334,8 @@ multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>;
+def SDT_AArch64PFalse : SDTypeProfile<1, 0, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>]>;
+def AArch64pfalse : SDNode<"AArch64ISD::PFALSE", SDT_AArch64PFalse>;
let Predicates = [HasSVEorStreamingSVE] in {
defm PTRUE : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>;
@@ -609,6 +611,15 @@ class sve_int_pfalse<bits<6> opc, string asm>
let isReMaterializable = 1;
}
+multiclass sve_int_pfalse<bits<6> opc, string asm> {
+ def NAME : sve_int_pfalse<opc, asm>;
+
+ def : Pat<(nxv16i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv8i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv4i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
+ def : Pat<(nxv2i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
+}
+
class sve_int_ptest<bits<6> opc, string asm>
: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
asm, "\t$Pg, $Pn",
@@ -1622,6 +1633,18 @@ multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
!cast<Instruction>(NAME), PTRUE_D>;
}
+multiclass sve_int_pred_log_and<bits<4> opc, string asm, SDPatternOperator op> :
+ sve_int_pred_log<opc, asm, op> {
+ def : Pat<(nxv16i1 (and nxv16i1:$Op1, nxv16i1:$Op2)),
+ (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+ def : Pat<(nxv8i1 (and nxv8i1:$Op1, nxv8i1:$Op2)),
+ (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+ def : Pat<(nxv4i1 (and nxv4i1:$Op1, nxv4i1:$Op2)),
+ (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+ def : Pat<(nxv2i1 (and nxv2i1:$Op1, nxv2i1:$Op2)),
+ (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+}
+
//===----------------------------------------------------------------------===//
// SVE Logical Mask Immediate Group
//===----------------------------------------------------------------------===//
@@ -1708,6 +1731,9 @@ multiclass sve_int_dup_mask_imm<string asm> {
(!cast<Instruction>(NAME) ZPR32:$Zd, sve_preferred_logical_imm32:$imm), 6>;
def : InstAlias<"mov $Zd, $imm",
(!cast<Instruction>(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>;
+
+ def : Pat<(nxv2i64 (AArch64dup (i64 logical_imm64:$imm))),
+ (!cast<Instruction>(NAME) logical_imm64:$imm)>;
}
//===----------------------------------------------------------------------===//
@@ -4641,6 +4667,10 @@ multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
(cmp $Op1, $Op2, $Op3)>;
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)),
(cmp $Op1, $Op3, $Op2)>;
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))),
+ (cmp $Pg, $Op2, $Op3)>;
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))),
+ (cmp $Pg, $Op3, $Op2)>;
}
multiclass SVE_SETCC_Pat_With_Zero<CondCode cc, CondCode invcc, ValueType predvt,
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 642080a0d40d..4a24162540a5 100644
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -40,10 +40,6 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64-sve-intrinsic-opts"
-namespace llvm {
-void initializeSVEIntrinsicOptsPass(PassRegistry &);
-}
-
namespace {
struct SVEIntrinsicOpts : public ModulePass {
static char ID; // Pass identification, replacement for typeid
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index caee2acd2606..5906a5d6b50b 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -483,18 +483,20 @@ inline unsigned getNumElementsFromSVEPredPattern(unsigned Pattern) {
}
/// Return specific VL predicate pattern based on the number of elements.
-inline unsigned getSVEPredPatternFromNumElements(unsigned MinNumElts) {
+inline Optional<unsigned>
+getSVEPredPatternFromNumElements(unsigned MinNumElts) {
switch (MinNumElts) {
default:
- llvm_unreachable("unexpected element count for SVE predicate");
+ return None;
case 1:
- return AArch64SVEPredPattern::vl1;
case 2:
- return AArch64SVEPredPattern::vl2;
+ case 3:
case 4:
- return AArch64SVEPredPattern::vl4;
+ case 5:
+ case 6:
+ case 7:
case 8:
- return AArch64SVEPredPattern::vl8;
+ return MinNumElts;
case 16:
return AArch64SVEPredPattern::vl16;
case 32:
@@ -757,7 +759,6 @@ namespace AArch64 {
// <n x (M*P) x t> vector (such as index 1) are undefined.
static constexpr unsigned SVEBitsPerBlock = 128;
static constexpr unsigned SVEMaxBitsPerVector = 2048;
-const unsigned NeonBitsPerVector = 128;
} // end namespace AArch64
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index e606f0e8fc3c..806c0b18637a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -610,12 +610,6 @@ def FeatureDsSrc2Insts : SubtargetFeature<"ds-src2-insts",
"Has ds_*_src2 instructions"
>;
-def FeatureRegisterBanking : SubtargetFeature<"register-banking",
- "HasRegisterBanking",
- "true",
- "Has register banking"
->;
-
def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
"HasVOP3Literal",
"true",
@@ -826,7 +820,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
- FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
+ FeatureNoSdstCMPX, FeatureVscnt,
FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
FeatureGFX10A16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index 22be014813b0..5ba9b2cd187e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -26,7 +26,7 @@ class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
const DataLayout &DL;
public:
- explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {}
+ explicit AMDGPUAAResult(const DataLayout &DL) : DL(DL) {}
AMDGPUAAResult(AMDGPUAAResult &&Arg)
: AAResultBase(std::move(Arg)), DL(Arg.DL) {}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 2f1e7823f65c..cd084fd5440a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -192,8 +192,20 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- if (!SPReg)
- SPReg = MIRBuilder.buildCopy(PtrTy, MFI->getStackPtrOffsetReg()).getReg(0);
+ if (!SPReg) {
+ const GCNSubtarget &ST = MIRBuilder.getMF().getSubtarget<GCNSubtarget>();
+ if (ST.enableFlatScratch()) {
+ // The stack is accessed unswizzled, so we can use a regular copy.
+ SPReg = MIRBuilder.buildCopy(PtrTy,
+ MFI->getStackPtrOffsetReg()).getReg(0);
+ } else {
+ // The address we produce here, without knowing the use context, is going
+ // to be interpreted as a vector address, so we need to convert to a
+ // swizzled address.
+ SPReg = MIRBuilder.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {PtrTy},
+ {MFI->getStackPtrOffsetReg()}).getReg(0);
+ }
+ }
auto OffsetReg = MIRBuilder.buildConstant(S32, Offset);
@@ -615,6 +627,13 @@ bool AMDGPUCallLowering::lowerFormalArguments(
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
+ // FIXME: This probably isn't defined for mesa
+ if (Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
+ Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
+ MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
+ CCInfo.AllocateReg(FlatScratchInitReg);
+ }
+
SmallVector<ArgInfo, 32> SplitArgs;
unsigned Idx = 0;
unsigned PSInputNum = 0;
@@ -879,13 +898,17 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
Register InputReg;
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
NeedWorkItemIDX) {
- InputReg = MRI.createGenericVirtualRegister(S32);
- LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
- std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
+ if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {
+ InputReg = MRI.createGenericVirtualRegister(S32);
+ LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
+ std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
+ } else {
+ InputReg = MIRBuilder.buildConstant(S32, 0).getReg(0);
+ }
}
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
- NeedWorkItemIDY) {
+ NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {
Register Y = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
std::get<2>(WorkitemIDY));
@@ -895,7 +918,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
}
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
- NeedWorkItemIDZ) {
+ NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {
Register Z = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
std::get<2>(WorkitemIDZ));
@@ -904,16 +927,24 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
}
- if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
+ if (!InputReg &&
+ (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
InputReg = MRI.createGenericVirtualRegister(S32);
-
- // Workitem ids are already packed, any of present incoming arguments will
- // carry all required fields.
- ArgDescriptor IncomingArg = ArgDescriptor::createArg(
- IncomingArgX ? *IncomingArgX :
+ if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
+ // We're in a situation where the outgoing function requires the workitem
+ // ID, but the calling function does not have it (e.g a graphics function
+ // calling a C calling convention function). This is illegal, but we need
+ // to produce something.
+ MIRBuilder.buildUndef(InputReg);
+ } else {
+ // Workitem ids are already packed, any of present incoming arguments will
+ // carry all required fields.
+ ArgDescriptor IncomingArg = ArgDescriptor::createArg(
+ IncomingArgX ? *IncomingArgX :
IncomingArgY ? *IncomingArgY : *IncomingArgZ, ~0u);
- LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
- &AMDGPU::VGPR_32RegClass, S32);
+ LI->loadInputValue(InputReg, MIRBuilder, &IncomingArg,
+ &AMDGPU::VGPR_32RegClass, S32);
+ }
}
if (OutgoingArg->isRegister()) {
@@ -1314,6 +1345,7 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
}
+ Info.IsTailCall = CanTailCallOpt;
if (CanTailCallOpt)
return lowerTailCall(MIRBuilder, Info, OutArgs);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index a55729586b8d..1920684d8f1f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -150,13 +150,13 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
/// \returns The minimum number of bits needed to store the value of \Op as an
/// unsigned integer. Truncating to this size and then zero-extending to
- /// ScalarSize will not change the value.
- unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const;
+ /// the original will not change the value.
+ unsigned numBitsUnsigned(Value *Op) const;
/// \returns The minimum number of bits needed to store the value of \Op as a
/// signed integer. Truncating to this size and then sign-extending to
- /// ScalarSize will not change the value.
- unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const;
+ /// the original size will not change the value.
+ unsigned numBitsSigned(Value *Op) const;
/// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
/// SelectionDAG has an issue where an and asserting the bits are known
@@ -445,17 +445,12 @@ bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
return true;
}
-unsigned AMDGPUCodeGenPrepare::numBitsUnsigned(Value *Op,
- unsigned ScalarSize) const {
- KnownBits Known = computeKnownBits(Op, *DL, 0, AC);
- return ScalarSize - Known.countMinLeadingZeros();
+unsigned AMDGPUCodeGenPrepare::numBitsUnsigned(Value *Op) const {
+ return computeKnownBits(Op, *DL, 0, AC).countMaxActiveBits();
}
-unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op,
- unsigned ScalarSize) const {
- // In order for this to be a signed 24-bit value, bit 23, must
- // be a sign bit.
- return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC) + 1;
+unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op) const {
+ return ComputeMaxSignificantBits(Op, *DL, 0, AC);
}
static void extractValues(IRBuilder<> &Builder,
@@ -532,12 +527,12 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
unsigned LHSBits = 0, RHSBits = 0;
bool IsSigned = false;
- if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS, Size)) <= 24 &&
- (RHSBits = numBitsUnsigned(RHS, Size)) <= 24) {
+ if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS)) <= 24 &&
+ (RHSBits = numBitsUnsigned(RHS)) <= 24) {
IsSigned = false;
- } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS, Size)) <= 24 &&
- (RHSBits = numBitsSigned(RHS, Size)) <= 24) {
+ } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS)) <= 24 &&
+ (RHSBits = numBitsSigned(RHS)) <= 24) {
IsSigned = true;
} else
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 699c6c479455..3ac7c45b3275 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -331,8 +331,7 @@ void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
// FIXME: Should report this for all address spaces
- PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
- PtrTy->getElementType());
+ PointeeAlign = Arg.getParamAlign().valueOrOne();
}
}
@@ -731,10 +730,8 @@ void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
// FIXME: Need to distinguish in memory alignment from pointer alignment.
if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
- if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
- PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
- PtrTy->getElementType());
- }
+ if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
+ PointeeAlign = Arg.getParamAlign().valueOrOne();
}
// There's no distinction between byval aggregates and raw aggregates.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 54177564afbc..b9d0655feef7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -51,7 +51,7 @@ unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
// In order for this to be a signed 24-bit value, bit 23, must
// be a sign bit.
- return DAG.ComputeMinSignedBits(Op);
+ return DAG.ComputeMaxSignificantBits(Op);
}
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
@@ -360,6 +360,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f16, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f16, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f32, Custom);
@@ -1408,6 +1410,11 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
Start != 1)
return Op;
+ if (((SrcVT == MVT::v8f16 && VT == MVT::v4f16) ||
+ (SrcVT == MVT::v8i16 && VT == MVT::v4i16)) &&
+ (Start == 0 || Start == 4))
+ return Op;
+
DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
VT.getVectorNumElements());
@@ -4626,11 +4633,12 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
RHSKnown = RHSKnown.trunc(24);
if (Opc == AMDGPUISD::MUL_I24) {
- unsigned LHSValBits = 24 - LHSKnown.countMinSignBits();
- unsigned RHSValBits = 24 - RHSKnown.countMinSignBits();
- unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
- if (MaxValBits >= 32)
+ unsigned LHSValBits = LHSKnown.countMaxSignificantBits();
+ unsigned RHSValBits = RHSKnown.countMaxSignificantBits();
+ unsigned MaxValBits = LHSValBits + RHSValBits;
+ if (MaxValBits > 32)
break;
+ unsigned SignBits = 32 - MaxValBits + 1;
bool LHSNegative = LHSKnown.isNegative();
bool LHSNonNegative = LHSKnown.isNonNegative();
bool LHSPositive = LHSKnown.isStrictlyPositive();
@@ -4639,16 +4647,16 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
bool RHSPositive = RHSKnown.isStrictlyPositive();
if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative))
- Known.Zero.setHighBits(32 - MaxValBits);
+ Known.Zero.setHighBits(SignBits);
else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative))
- Known.One.setHighBits(32 - MaxValBits);
+ Known.One.setHighBits(SignBits);
} else {
- unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros();
- unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros();
- unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
+ unsigned LHSValBits = LHSKnown.countMaxActiveBits();
+ unsigned RHSValBits = RHSKnown.countMaxActiveBits();
+ unsigned MaxValBits = LHSValBits + RHSValBits;
if (MaxValBits >= 32)
break;
- Known.Zero.setHighBits(32 - MaxValBits);
+ Known.Zero.setBitsFrom(MaxValBits);
}
break;
}
@@ -4904,7 +4912,8 @@ AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
}
}
-bool AMDGPUTargetLowering::isConstantUnsignedBitfieldExtactLegal(
+bool AMDGPUTargetLowering::isConstantUnsignedBitfieldExtractLegal(
unsigned Opc, LLT Ty1, LLT Ty2) const {
- return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
+ return (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)) &&
+ Ty2 == LLT::scalar(32);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index daaca8737c5d..b41506157b68 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -335,8 +335,8 @@ public:
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
- bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1,
- LLT Ty2) const override;
+ bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
+ LLT Ty2) const override;
};
namespace AMDGPUISD {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index db84b8766924..4f1d700bcd84 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -58,24 +58,37 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
// Check if a value can be converted to a 16-bit value without losing
// precision.
-static bool canSafelyConvertTo16Bit(Value &V) {
+// The value is expected to be either a float (IsFloat = true) or an unsigned
+// integer (IsFloat = false).
+static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
Type *VTy = V.getType();
if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
// The value is already 16-bit, so we don't want to convert to 16-bit again!
return false;
}
- if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
- // We need to check that if we cast the index down to a half, we do not lose
- // precision.
- APFloat FloatValue(ConstFloat->getValueAPF());
- bool LosesInfo = true;
- FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
- return !LosesInfo;
+ if (IsFloat) {
+ if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
+ // We need to check that if we cast the index down to a half, we do not
+ // lose precision.
+ APFloat FloatValue(ConstFloat->getValueAPF());
+ bool LosesInfo = true;
+ FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
+ &LosesInfo);
+ return !LosesInfo;
+ }
+ } else {
+ if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
+ // We need to check that if we cast the index down to an i16, we do not
+ // lose precision.
+ APInt IntValue(ConstInt->getValue());
+ return IntValue.getActiveBits() <= 16;
+ }
}
+
Value *CastSrc;
- if (match(&V, m_FPExt(PatternMatch::m_Value(CastSrc))) ||
- match(&V, m_SExt(PatternMatch::m_Value(CastSrc))) ||
- match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)))) {
+ bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
+ : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
+ if (IsExt) {
Type *CastSrcTy = CastSrc->getType();
if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
return true;
@@ -97,13 +110,116 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
llvm_unreachable("Should never be called!");
}
+/// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with
+/// the modified arguments.
+static Optional<Instruction *> modifyIntrinsicCall(
+ IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC,
+ std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
+ Func) {
+ SmallVector<Type *, 4> ArgTys;
+ if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
+ return None;
+
+ SmallVector<Value *, 8> Args(II.args());
+
+ // Modify arguments and types
+ Func(Args, ArgTys);
+
+ Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys);
+
+ CallInst *NewCall = IC.Builder.CreateCall(I, Args);
+ NewCall->takeName(&II);
+ NewCall->copyMetadata(II);
+ if (isa<FPMathOperator>(NewCall))
+ NewCall->copyFastMathFlags(&II);
+
+ // Erase and replace uses
+ if (!II.getType()->isVoidTy())
+ IC.replaceInstUsesWith(II, NewCall);
+ return IC.eraseInstFromFunction(II);
+}
+
static Optional<Instruction *>
simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
IntrinsicInst &II, InstCombiner &IC) {
+ // Optimize _L to _LZ when _L is zero
+ if (const auto *LZMappingInfo =
+ AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
+ if (auto *ConstantLod =
+ dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
+ if (ConstantLod->isZero() || ConstantLod->isNegative()) {
+ const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+ AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
+ ImageDimIntr->Dim);
+ return modifyIntrinsicCall(
+ II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+ Args.erase(Args.begin() + ImageDimIntr->LodIndex);
+ });
+ }
+ }
+ }
+
+ // Optimize _mip away, when 'lod' is zero
+ if (const auto *MIPMappingInfo =
+ AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
+ if (auto *ConstantMip =
+ dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
+ if (ConstantMip->isZero()) {
+ const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+ AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
+ ImageDimIntr->Dim);
+ return modifyIntrinsicCall(
+ II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+ Args.erase(Args.begin() + ImageDimIntr->MipIndex);
+ });
+ }
+ }
+ }
+
+ // Optimize _bias away when 'bias' is zero
+ if (const auto *BiasMappingInfo =
+ AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) {
+ if (auto *ConstantBias =
+ dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
+ if (ConstantBias->isZero()) {
+ const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+ AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
+ ImageDimIntr->Dim);
+ return modifyIntrinsicCall(
+ II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+ Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
+ ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
+ });
+ }
+ }
+ }
+
+ // Optimize _offset away when 'offset' is zero
+ if (const auto *OffsetMappingInfo =
+ AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) {
+ if (auto *ConstantOffset =
+ dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) {
+ if (ConstantOffset->isZero()) {
+ const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+ AMDGPU::getImageDimIntrinsicByBaseOpcode(
+ OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
+ return modifyIntrinsicCall(
+ II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+ Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
+ });
+ }
+ }
+ }
+
+ // Try to use A16 or G16
if (!ST->hasA16() && !ST->hasG16())
return None;
+ // Address is interpreted as float if the instruction has a sampler or as
+ // unsigned int if there is no sampler.
+ bool HasSampler =
+ AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler;
bool FloatCoord = false;
// true means derivatives can be converted to 16 bit, coordinates not
bool OnlyDerivatives = false;
@@ -112,7 +228,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
Value *Coord = II.getOperand(OperandIndex);
// If the values are not derived from 16-bit values, we cannot optimize.
- if (!canSafelyConvertTo16Bit(*Coord)) {
+ if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
if (OperandIndex < ImageDimIntr->CoordStart ||
ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
return None;
@@ -127,43 +243,50 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
FloatCoord = Coord->getType()->isFloatingPointTy();
}
- if (OnlyDerivatives) {
- if (!ST->hasG16())
- return None;
- } else {
- if (!ST->hasA16())
- OnlyDerivatives = true; // Only supports G16
+ if (!OnlyDerivatives && !ST->hasA16())
+ OnlyDerivatives = true; // Only supports G16
+
+ // Check if there is a bias parameter and if it can be converted to f16
+ if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
+ Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
+ assert(HasSampler &&
+ "Only image instructions with a sampler can have a bias");
+ if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
+ OnlyDerivatives = true;
}
+ if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
+ ImageDimIntr->CoordStart))
+ return None;
+
Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
: Type::getInt16Ty(II.getContext());
- SmallVector<Type *, 4> ArgTys;
- if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
- return None;
-
- ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
- if (!OnlyDerivatives)
- ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
- Function *I =
- Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys);
+ return modifyIntrinsicCall(
+ II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
+ ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
+ if (!OnlyDerivatives) {
+ ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
- SmallVector<Value *, 8> Args(II.args());
+ // Change the bias type
+ if (ImageDimIntr->NumBiasArgs != 0)
+ ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
+ }
- unsigned EndIndex =
- OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
- for (unsigned OperandIndex = ImageDimIntr->GradientStart;
- OperandIndex < EndIndex; OperandIndex++) {
- Args[OperandIndex] =
- convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
- }
+ unsigned EndIndex =
+ OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
+ for (unsigned OperandIndex = ImageDimIntr->GradientStart;
+ OperandIndex < EndIndex; OperandIndex++) {
+ Args[OperandIndex] =
+ convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
+ }
- CallInst *NewCall = IC.Builder.CreateCall(I, Args);
- NewCall->takeName(&II);
- NewCall->copyMetadata(II);
- if (isa<FPMathOperator>(NewCall))
- NewCall->copyFastMathFlags(&II);
- return IC.replaceInstUsesWith(II, NewCall);
+ // Convert the bias
+ if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
+ Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
+ Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
+ }
+ });
}
bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index b1263618c5db..e7ee36447682 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -20,9 +20,6 @@
namespace llvm {
class GCNSubtarget;
-class MachineFunction;
-class MachineInstr;
-class MachineInstrBuilder;
class MachineMemOperand;
class AMDGPUInstrInfo {
@@ -52,6 +49,9 @@ struct ImageDimIntrinsicInfo {
unsigned BaseOpcode;
MIMGDim Dim;
+ uint8_t NumOffsetArgs;
+ uint8_t NumBiasArgs;
+ uint8_t NumZCompareArgs;
uint8_t NumGradients;
uint8_t NumDmask;
uint8_t NumData;
@@ -60,6 +60,9 @@ struct ImageDimIntrinsicInfo {
uint8_t DMaskIndex;
uint8_t VAddrStart;
+ uint8_t OffsetIndex;
+ uint8_t BiasIndex;
+ uint8_t ZCompareIndex;
uint8_t GradientStart;
uint8_t CoordStart;
uint8_t LodIndex;
@@ -71,6 +74,7 @@ struct ImageDimIntrinsicInfo {
uint8_t TexFailCtrlIndex;
uint8_t CachePolicyIndex;
+ uint8_t BiasTyArg;
uint8_t GradientTyArg;
uint8_t CoordTyArg;
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index e16bead81b65..b7d0f0580cda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -46,8 +46,7 @@ static cl::opt<bool> AllowRiskySelect(
AMDGPUInstructionSelector::AMDGPUInstructionSelector(
const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
const AMDGPUTargetMachine &TM)
- : InstructionSelector(), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
+ : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
STI(STI),
EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
#define GET_GLOBALISEL_PREDICATES_INIT
@@ -1103,7 +1102,18 @@ bool AMDGPUInstructionSelector::selectIntrinsicIcmp(MachineInstr &I) const {
const DebugLoc &DL = I.getDebugLoc();
Register SrcReg = I.getOperand(2).getReg();
unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
+
auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(4).getImm());
+ if (!ICmpInst::isIntPredicate(static_cast<ICmpInst::Predicate>(Pred))) {
+ MachineInstr *ICmp =
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
+
+ if (!RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
+ *TRI.getBoolRC(), *MRI))
+ return false;
+ I.eraseFromParent();
+ return true;
+ }
int Opcode = getV_CMPOpcode(Pred, Size);
if (Opcode == -1)
@@ -1234,7 +1244,7 @@ bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
// Get the return address reg and mark it as an implicit live-in
Register ReturnAddrReg = TRI.getReturnAddressReg(MF);
Register LiveIn = getFunctionLiveInPhysReg(MF, TII, ReturnAddrReg,
- AMDGPU::SReg_64RegClass);
+ AMDGPU::SReg_64RegClass, DL);
BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
.addReg(LiveIn);
I.eraseFromParent();
@@ -1494,9 +1504,9 @@ static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE,
if (TexFailCtrl)
IsTexFail = true;
- TFE = (TexFailCtrl & 0x1) ? 1 : 0;
+ TFE = (TexFailCtrl & 0x1) ? true : false;
TexFailCtrl &= ~(uint64_t)0x1;
- LWE = (TexFailCtrl & 0x2) ? 1 : 0;
+ LWE = (TexFailCtrl & 0x2) ? true : false;
TexFailCtrl &= ~(uint64_t)0x2;
return TexFailCtrl == 0;
@@ -1511,10 +1521,6 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
- const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
- AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
- const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
- AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
unsigned IntrOpcode = Intr->BaseOpcode;
const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI);
@@ -1523,7 +1529,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
Register VDataIn, VDataOut;
LLT VDataTy;
int NumVDataDwords = -1;
- bool IsD16 = false;
+ bool IsD16 = MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
+ MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
bool Unorm;
if (!BaseOpcode->Sampler)
@@ -1572,16 +1579,6 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm();
DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
- // One memoperand is mandatory, except for getresinfo.
- // FIXME: Check this in verifier.
- if (!MI.memoperands_empty()) {
- const MachineMemOperand *MMO = *MI.memoperands_begin();
-
- // Infer d16 from the memory size, as the register type will be mangled by
- // unpacked subtargets, or by TFE.
- IsD16 = ((8 * MMO->getSize()) / DMaskLanes) < 32;
- }
-
if (BaseOpcode->Store) {
VDataIn = MI.getOperand(1).getReg();
VDataTy = MRI->getType(VDataIn);
@@ -1596,26 +1593,6 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
}
}
- // Optimize _L to _LZ when _L is zero
- if (LZMappingInfo) {
- // The legalizer replaced the register with an immediate 0 if we need to
- // change the opcode.
- const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->LodIndex);
- if (Lod.isImm()) {
- assert(Lod.getImm() == 0);
- IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
- }
- }
-
- // Optimize _mip away, when 'lod' is zero
- if (MIPMappingInfo) {
- const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->MipIndex);
- if (Lod.isImm()) {
- assert(Lod.getImm() == 0);
- IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip
- }
- }
-
// Set G16 opcode
if (IsG16 && !IsA16) {
const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
@@ -2562,6 +2539,8 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
Register MaskReg = I.getOperand(2).getReg();
LLT Ty = MRI->getType(DstReg);
LLT MaskTy = MRI->getType(MaskReg);
+ MachineBasicBlock *BB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
@@ -2570,6 +2549,24 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
if (DstRB != SrcRB) // Should only happen for hand written MIR.
return false;
+ // Try to avoid emitting a bit operation when we only need to touch half of
+ // the 64-bit pointer.
+ APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64);
+ const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
+ const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
+
+ const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
+ const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
+
+ if (!IsVGPR && Ty.getSizeInBits() == 64 &&
+ !CanCopyLow32 && !CanCopyHi32) {
+ auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg)
+ .addReg(SrcReg)
+ .addReg(MaskReg);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ }
+
unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
const TargetRegisterClass &RegRC
= IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
@@ -2586,8 +2583,6 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
!RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
return false;
- MachineBasicBlock *BB = I.getParent();
- const DebugLoc &DL = I.getDebugLoc();
if (Ty.getSizeInBits() == 32) {
assert(MaskTy.getSizeInBits() == 32 &&
"ptrmask should have been narrowed during legalize");
@@ -2610,13 +2605,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
Register MaskedLo, MaskedHi;
- // Try to avoid emitting a bit operation when we only need to touch half of
- // the 64-bit pointer.
- APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64);
-
- const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
- const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
- if ((MaskOnes & MaskLo32) == MaskLo32) {
+ if (CanCopyLow32) {
// If all the bits in the low half are 1, we only need a copy for it.
MaskedLo = LoReg;
} else {
@@ -2631,7 +2620,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
.addReg(MaskLo);
}
- if ((MaskOnes & MaskHi32) == MaskHi32) {
+ if (CanCopyHi32) {
// If all the bits in the high half are 1, we only need a copy for it.
MaskedHi = HiReg;
} else {
@@ -3123,6 +3112,33 @@ bool AMDGPUInstructionSelector::selectBVHIntrinsic(MachineInstr &MI) const{
return true;
}
+bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ if (IsVALU) {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
+ .addImm(Subtarget->getWavefrontSizeLog2())
+ .addReg(SrcReg);
+ } else {
+ BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg)
+ .addReg(SrcReg)
+ .addImm(Subtarget->getWavefrontSizeLog2());
+ }
+
+ const TargetRegisterClass &RC =
+ IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
+ if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::select(MachineInstr &I) {
if (I.isPHI())
return selectPHI(I);
@@ -3236,7 +3252,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_SHUFFLE_VECTOR:
return selectG_SHUFFLE_VECTOR(I);
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
- case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: {
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
const AMDGPU::ImageDimIntrinsicInfo *Intr
= AMDGPU::getImageDimIntrinsicInfo(I.getIntrinsicID());
assert(Intr && "not an image intrinsic with image pseudo");
@@ -3252,6 +3270,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case AMDGPU::G_SI_CALL:
I.setDesc(TII.get(AMDGPU::SI_CALL));
return true;
+ case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
+ return selectWaveAddress(I);
default:
return selectImpl(I, *CoverageInfo);
}
@@ -3896,20 +3916,59 @@ bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
return (LHSKnownZeros | *RHS).countTrailingOnes() >= ShAmtBits;
}
+// Return the wave level SGPR base address if this is a wave address.
+static Register getWaveAddress(const MachineInstr *Def) {
+ return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
+ ? Def->getOperand(1).getReg()
+ : Register();
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectMUBUFScratchOffset(
MachineOperand &Root) const {
- MachineInstr *MI = Root.getParent();
- MachineBasicBlock *MBB = MI->getParent();
+ Register Reg = Root.getReg();
+ const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+
+ const MachineInstr *Def = MRI->getVRegDef(Reg);
+ if (Register WaveBase = getWaveAddress(Def)) {
+ return {{
+ [=](MachineInstrBuilder &MIB) { // rsrc
+ MIB.addReg(Info->getScratchRSrcReg());
+ },
+ [=](MachineInstrBuilder &MIB) { // soffset
+ MIB.addReg(WaveBase);
+ },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // offset
+ }};
+ }
int64_t Offset = 0;
+
+ // FIXME: Copy check is a hack
+ Register BasePtr;
+ if (mi_match(Reg, *MRI, m_GPtrAdd(m_Reg(BasePtr), m_Copy(m_ICst(Offset))))) {
+ if (!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
+ return {};
+ const MachineInstr *BasePtrDef = MRI->getVRegDef(BasePtr);
+ Register WaveBase = getWaveAddress(BasePtrDef);
+ if (!WaveBase)
+ return {};
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { // rsrc
+ MIB.addReg(Info->getScratchRSrcReg());
+ },
+ [=](MachineInstrBuilder &MIB) { // soffset
+ MIB.addReg(WaveBase);
+ },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
+ }};
+ }
+
if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) ||
!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
return {};
- const MachineFunction *MF = MBB->getParent();
- const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
-
return {{
[=](MachineInstrBuilder &MIB) { // rsrc
MIB.addReg(Info->getScratchRSrcReg());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 26996e42af53..42095332d11a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -30,7 +30,6 @@ namespace AMDGPU {
struct ImageDimIntrinsicInfo;
}
-class AMDGPUInstrInfo;
class AMDGPURegisterBankInfo;
class AMDGPUTargetMachine;
class BlockFrequencyInfo;
@@ -42,7 +41,6 @@ class MachineOperand;
class MachineRegisterInfo;
class RegisterBank;
class SIInstrInfo;
-class SIMachineFunctionInfo;
class SIRegisterInfo;
class TargetRegisterClass;
@@ -147,6 +145,7 @@ private:
bool selectGlobalAtomicFadd(MachineInstr &I, MachineOperand &AddrOp,
MachineOperand &DataOp) const;
bool selectBVHIntrinsic(MachineInstr &I) const;
+ bool selectWaveAddress(MachineInstr &I) const;
std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
bool AllowAbs = true) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 0528b552f475..7d3dbfd7e851 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -18,6 +18,7 @@ class AddressSpacesImpl {
int Local = 3;
int Constant = 4;
int Private = 5;
+ int Constant32Bit = 6;
}
def AddrSpaces : AddressSpacesImpl;
@@ -405,18 +406,23 @@ class Aligned<int Bytes> {
int MinAlignment = Bytes;
}
-class StoreHi16<SDPatternOperator op> : PatFrag <
+class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag <
(ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> {
let IsStore = 1;
+ let MemoryVT = vt;
}
-def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant ]>;
-def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, AddrSpaces.Constant ]>;
+def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant,
+ AddrSpaces.Constant32Bit ]>;
+def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global,
+ AddrSpaces.Constant,
+ AddrSpaces.Constant32Bit ]>;
def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
-def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat,
- AddrSpaces.Global,
- AddrSpaces.Constant ]>;
+def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat,
+ AddrSpaces.Global,
+ AddrSpaces.Constant,
+ AddrSpaces.Constant32Bit ]>;
def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
@@ -522,9 +528,9 @@ def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
let MemoryVT = i16;
}
-def store_hi16_#as : StoreHi16 <truncstorei16>;
-def truncstorei8_hi16_#as : StoreHi16<truncstorei8>;
-def truncstorei16_hi16_#as : StoreHi16<truncstorei16>;
+def store_hi16_#as : StoreHi16 <truncstorei16, i16>;
+def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>;
+def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>;
defm atomic_store_#as : binary_atomic_op<atomic_store>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 5046daaed977..04c6f67ed339 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -272,8 +272,8 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
const bool IsLoad = Query.Opcode != AMDGPU::G_STORE;
unsigned RegSize = Ty.getSizeInBits();
- unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
- unsigned AlignBits = Query.MMODescrs[0].AlignInBits;
+ uint64_t MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
+ uint64_t AlignBits = Query.MMODescrs[0].AlignInBits;
unsigned AS = Query.Types[1].getAddressSpace();
// All of these need to be custom lowered to cast the pointer operand.
@@ -380,7 +380,7 @@ static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty,
/// access up to the alignment. Note this case when the memory access itself
/// changes, not the size of the result register.
static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy,
- unsigned AlignInBits, unsigned AddrSpace,
+ uint64_t AlignInBits, unsigned AddrSpace,
unsigned Opcode) {
unsigned SizeInBits = MemoryTy.getSizeInBits();
// We don't want to widen cases that are naturally legal.
@@ -929,10 +929,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_CTPOP)
.legalFor({{S32, S32}, {S32, S64}})
.clampScalar(0, S32, S32)
+ .widenScalarToNextPow2(1, 32)
.clampScalar(1, S32, S64)
.scalarize(0)
- .widenScalarToNextPow2(0, 32)
- .widenScalarToNextPow2(1, 32);
+ .widenScalarToNextPow2(0, 32);
+
// The hardware instructions return a different result on 0 than the generic
// instructions expect. The hardware produces -1, but these produce the
@@ -1172,7 +1173,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (MemSize > MaxSize)
return std::make_pair(0, LLT::scalar(MaxSize));
- unsigned Align = Query.MMODescrs[0].AlignInBits;
+ uint64_t Align = Query.MMODescrs[0].AlignInBits;
return std::make_pair(0, LLT::scalar(Align));
})
.fewerElementsIf(
@@ -1295,6 +1296,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.hasAtomicFaddInsts())
Atomic.legalFor({{S32, GlobalPtr}});
+ if (ST.hasGFX90AInsts()) {
+ // These are legal with some caveats, and should have undergone expansion in
+ // the IR in most situations
+ // TODO: Move atomic expansion into legalizer
+ // TODO: Also supports <2 x f16>
+ Atomic.legalFor({
+ {S32, GlobalPtr},
+ {S64, GlobalPtr},
+ {S64, FlatPtr}
+ });
+ }
+
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output
// demarshalling
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
@@ -1345,8 +1358,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
}, changeTo(1, S16));
Shifts.maxScalarIf(typeIs(0, S16), 1, S16);
Shifts.clampScalar(1, S32, S32);
- Shifts.clampScalar(0, S16, S64);
Shifts.widenScalarToNextPow2(0, 16);
+ Shifts.clampScalar(0, S16, S64);
getActionDefinitionsBuilder({G_SSHLSAT, G_USHLSAT})
.minScalar(0, S16)
@@ -1357,8 +1370,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// expansion for the shifted type will produce much worse code if it hasn't
// been truncated already.
Shifts.clampScalar(1, S32, S32);
- Shifts.clampScalar(0, S32, S64);
Shifts.widenScalarToNextPow2(0, 32);
+ Shifts.clampScalar(0, S32, S64);
getActionDefinitionsBuilder({G_SSHLSAT, G_USHLSAT})
.minScalar(0, S32)
@@ -1812,6 +1825,27 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
return B.buildLoad(S32, LoadAddr, *MMO).getReg(0);
}
+/// Return true if the value is a known valid address, such that a null check is
+/// not necessary.
+static bool isKnownNonNull(Register Val, MachineRegisterInfo &MRI,
+ const AMDGPUTargetMachine &TM, unsigned AddrSpace) {
+ MachineInstr *Def = MRI.getVRegDef(Val);
+ switch (Def->getOpcode()) {
+ case AMDGPU::G_FRAME_INDEX:
+ case AMDGPU::G_GLOBAL_VALUE:
+ case AMDGPU::G_BLOCK_ADDR:
+ return true;
+ case AMDGPU::G_CONSTANT: {
+ const ConstantInt *CI = Def->getOperand(1).getCImm();
+ return CI->getSExtValue() != TM.getNullPointerValue(AddrSpace);
+ }
+ default:
+ return false;
+ }
+
+ return false;
+}
+
bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -1862,6 +1896,14 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
DestAS == AMDGPUAS::PRIVATE_ADDRESS);
+
+ if (isKnownNonNull(Src, MRI, TM, SrcAS)) {
+ // Extract low 32-bits of the pointer.
+ B.buildExtract(Dst, Src, 0);
+ MI.eraseFromParent();
+ return true;
+ }
+
unsigned NullVal = TM.getNullPointerValue(DestAS);
auto SegmentNull = B.buildConstant(DstTy, NullVal);
@@ -1884,24 +1926,29 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
if (!ST.hasFlatAddressSpace())
return false;
- auto SegmentNull =
- B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
- auto FlatNull =
- B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
-
Register ApertureReg = getSegmentAperture(SrcAS, MRI, B);
if (!ApertureReg.isValid())
return false;
- auto CmpRes =
- B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, SegmentNull.getReg(0));
-
// Coerce the type of the low half of the result so we can use merge_values.
Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0);
// TODO: Should we allow mismatched types but matching sizes in merges to
// avoid the ptrtoint?
auto BuildPtr = B.buildMerge(DstTy, {SrcAsInt, ApertureReg});
+
+ if (isKnownNonNull(Src, MRI, TM, SrcAS)) {
+ B.buildCopy(Dst, BuildPtr);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ auto SegmentNull = B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
+ auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
+
+ auto CmpRes =
+ B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, SegmentNull.getReg(0));
+
B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull);
MI.eraseFromParent();
@@ -1959,6 +2006,7 @@ bool AMDGPULegalizerInfo::legalizeFceil(
// TODO: Should this propagate fast-math-flags?
B.buildFAdd(MI.getOperand(0).getReg(), Trunc, Add);
+ MI.eraseFromParent();
return true;
}
@@ -2213,10 +2261,12 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
LLT EltTy = VecTy.getElementType();
assert(EltTy == MRI.getType(Dst));
- if (IdxVal < VecTy.getNumElements())
- B.buildExtract(Dst, Vec, IdxVal * EltTy.getSizeInBits());
- else
+ if (IdxVal < VecTy.getNumElements()) {
+ auto Unmerge = B.buildUnmerge(EltTy, Vec);
+ B.buildCopy(Dst, Unmerge.getReg(IdxVal));
+ } else {
B.buildUndef(Dst);
+ }
MI.eraseFromParent();
return true;
@@ -2245,11 +2295,20 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
LLT VecTy = MRI.getType(Vec);
LLT EltTy = VecTy.getElementType();
assert(EltTy == MRI.getType(Ins));
+ (void)Ins;
- if (IdxVal < VecTy.getNumElements())
- B.buildInsert(Dst, Vec, Ins, IdxVal * EltTy.getSizeInBits());
- else
+ unsigned NumElts = VecTy.getNumElements();
+ if (IdxVal < NumElts) {
+ SmallVector<Register, 8> SrcRegs;
+ for (unsigned i = 0; i < NumElts; ++i)
+ SrcRegs.push_back(MRI.createGenericVirtualRegister(EltTy));
+ B.buildUnmerge(SrcRegs, Vec);
+
+ SrcRegs[IdxVal] = MI.getOperand(2).getReg();
+ B.buildMerge(Dst, SrcRegs);
+ } else {
B.buildUndef(Dst);
+ }
MI.eraseFromParent();
return true;
@@ -2502,7 +2561,7 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
const LLT MemTy = MMO->getMemoryType();
const Align MemAlign = MMO->getAlign();
const unsigned MemSize = MemTy.getSizeInBits();
- const unsigned AlignInBits = 8 * MemAlign.value();
+ const uint64_t AlignInBits = 8 * MemAlign.value();
// Widen non-power-of-2 loads to the alignment if needed
if (shouldWidenLoad(ST, MemTy, AlignInBits, AddrSpace, MI.getOpcode())) {
@@ -2832,8 +2891,8 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
assert(Register::isPhysicalRegister(SrcReg) && "Physical register expected");
assert(DstReg.isVirtual() && "Virtual register expected");
- Register LiveIn = getFunctionLiveInPhysReg(B.getMF(), B.getTII(), SrcReg, *ArgRC,
- ArgTy);
+ Register LiveIn = getFunctionLiveInPhysReg(B.getMF(), B.getTII(), SrcReg,
+ *ArgRC, B.getDebugLoc(), ArgTy);
if (Arg->isMasked()) {
// TODO: Should we try to emit this once in the entry block?
const LLT S32 = LLT::scalar(32);
@@ -2842,6 +2901,8 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
Register AndMaskSrc = LiveIn;
+ // TODO: Avoid clearing the high bits if we know workitem id y/z are always
+ // 0.
if (Shift != 0) {
auto ShiftAmt = B.buildConstant(S32, Shift);
AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0);
@@ -4106,7 +4167,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP;
- case Intrinsic::amdgcn_buffer_atomic_fadd:
case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD;
@@ -4213,15 +4273,18 @@ static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI,
if ((I < Intr->GradientStart) ||
(I >= Intr->GradientStart && I < Intr->CoordStart && !IsG16) ||
(I >= Intr->CoordStart && !IsA16)) {
- // Handle any gradient or coordinate operands that should not be packed
if ((I < Intr->GradientStart) && IsA16 &&
(B.getMRI()->getType(AddrReg) == S16)) {
+ assert(I == Intr->BiasIndex && "Got unexpected 16-bit extra argument");
// Special handling of bias when A16 is on. Bias is of type half but
// occupies full 32-bit.
PackedAddrs.push_back(
B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)})
.getReg(0));
} else {
+ assert((!IsA16 || Intr->NumBiasArgs == 0 || I != Intr->BiasIndex) &&
+ "Bias needs to be converted to 16 bit in A16 mode");
+ // Handle any gradient or coordinate operands that should not be packed
AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);
PackedAddrs.push_back(AddrReg);
}
@@ -4320,6 +4383,8 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
const LLT V2S16 = LLT::fixed_vector(2, 16);
unsigned DMask = 0;
+ Register VData = MI.getOperand(NumDefs == 0 ? 1 : 0).getReg();
+ LLT Ty = MRI->getType(VData);
// Check for 16 bit addresses and pack if true.
LLT GradTy =
@@ -4328,6 +4393,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
MRI->getType(MI.getOperand(ArgOffset + Intr->CoordStart).getReg());
const bool IsG16 = GradTy == S16;
const bool IsA16 = AddrTy == S16;
+ const bool IsD16 = Ty.getScalarType() == S16;
int DMaskLanes = 0;
if (!BaseOpcode->Atomic) {
@@ -4347,8 +4413,11 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
Observer.changingInstr(MI);
auto ChangedInstr = make_scope_exit([&] { Observer.changedInstr(MI); });
- unsigned NewOpcode = NumDefs == 0 ?
- AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE : AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD;
+ const unsigned StoreOpcode = IsD16 ? AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16
+ : AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE;
+ const unsigned LoadOpcode = IsD16 ? AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16
+ : AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD;
+ unsigned NewOpcode = NumDefs == 0 ? StoreOpcode : LoadOpcode;
// Track that we legalized this
MI.setDesc(B.getTII().get(NewOpcode));
@@ -4381,44 +4450,6 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
unsigned CorrectedNumVAddrs = Intr->NumVAddrs;
- // Optimize _L to _LZ when _L is zero
- if (const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
- AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode)) {
- const ConstantFP *ConstantLod;
-
- if (mi_match(MI.getOperand(ArgOffset + Intr->LodIndex).getReg(), *MRI,
- m_GFCst(ConstantLod))) {
- if (ConstantLod->isZero() || ConstantLod->isNegative()) {
- // Set new opcode to _lz variant of _l, and change the intrinsic ID.
- const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
- AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
- Intr->Dim);
-
- // The starting indexes should remain in the same place.
- --CorrectedNumVAddrs;
-
- MI.getOperand(MI.getNumExplicitDefs())
- .setIntrinsicID(static_cast<Intrinsic::ID>(NewImageDimIntr->Intr));
- MI.RemoveOperand(ArgOffset + Intr->LodIndex);
- Intr = NewImageDimIntr;
- }
- }
- }
-
- // Optimize _mip away, when 'lod' is zero
- if (AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode)) {
- int64_t ConstantLod;
- if (mi_match(MI.getOperand(ArgOffset + Intr->MipIndex).getReg(), *MRI,
- m_ICst(ConstantLod))) {
- if (ConstantLod == 0) {
- // TODO: Change intrinsic opcode and remove operand instead or replacing
- // it with 0, as the _L to _LZ handling is done above.
- MI.getOperand(ArgOffset + Intr->MipIndex).ChangeToImmediate(0);
- --CorrectedNumVAddrs;
- }
- }
- }
-
// Rewrite the addressing register layout before doing anything else.
if (BaseOpcode->Gradients && !ST.hasG16() && (IsA16 != IsG16)) {
// 16 bit gradients are supported, but are tied to the A16 control
@@ -4494,9 +4525,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
if (BaseOpcode->Store) { // No TFE for stores?
// TODO: Handle dmask trim
- Register VData = MI.getOperand(1).getReg();
- LLT Ty = MRI->getType(VData);
- if (!Ty.isVector() || Ty.getElementType() != S16)
+ if (!Ty.isVector() || !IsD16)
return true;
Register RepackedReg = handleD16VData(B, *MRI, VData, true);
@@ -4508,9 +4537,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
}
Register DstReg = MI.getOperand(0).getReg();
- LLT Ty = MRI->getType(DstReg);
const LLT EltTy = Ty.getScalarType();
- const bool IsD16 = Ty.getScalarType() == S16;
const int NumElts = Ty.isVector() ? Ty.getNumElements() : 1;
// Confirm that the return type is large enough for the dmask specified
@@ -4918,6 +4945,12 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
return true;
}
+static bool replaceWithConstant(MachineIRBuilder &B, MachineInstr &MI, int64_t C) {
+ B.buildConstant(MI.getOperand(0).getReg(), C);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &B = Helper.MIRBuilder;
@@ -5021,12 +5054,20 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_implicitarg_ptr:
return legalizeImplicitArgPtr(MI, MRI, B);
case Intrinsic::amdgcn_workitem_id_x:
+ if (ST.getMaxWorkitemID(B.getMF().getFunction(), 0) == 0)
+ return replaceWithConstant(B, MI, 0);
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKITEM_ID_X);
case Intrinsic::amdgcn_workitem_id_y:
+ if (ST.getMaxWorkitemID(B.getMF().getFunction(), 1) == 0)
+ return replaceWithConstant(B, MI, 0);
+
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
case Intrinsic::amdgcn_workitem_id_z:
+ if (ST.getMaxWorkitemID(B.getMF().getFunction(), 2) == 0)
+ return replaceWithConstant(B, MI, 0);
+
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
case Intrinsic::amdgcn_workgroup_id_x:
@@ -5105,16 +5146,29 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_struct_buffer_atomic_inc:
case Intrinsic::amdgcn_raw_buffer_atomic_dec:
case Intrinsic::amdgcn_struct_buffer_atomic_dec:
- case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
- case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
- case Intrinsic::amdgcn_buffer_atomic_fadd:
case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
return legalizeBufferAtomic(MI, B, IntrID);
+ case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
+ case Intrinsic::amdgcn_struct_buffer_atomic_fadd: {
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!MRI.use_empty(DstReg) && !ST.hasGFX90AInsts()) {
+ Function &F = B.getMF().getFunction();
+ DiagnosticInfoUnsupported NoFpRet(
+ F, "return versions of fp atomics not supported", B.getDebugLoc(),
+ DS_Error);
+ F.getContext().diagnose(NoFpRet);
+ B.buildUndef(DstReg);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ return legalizeBufferAtomic(MI, B, IntrID);
+ }
case Intrinsic::amdgcn_atomic_inc:
return legalizeAtomicIncDec(MI, B, true);
case Intrinsic::amdgcn_atomic_dec:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 7faf0436f995..964a41d3d740 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -21,7 +21,6 @@
namespace llvm {
class GCNTargetMachine;
-class LLVMContext;
class GCNSubtarget;
class MachineIRBuilder;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 49cf6db5197f..c28427758ac7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -58,9 +58,6 @@ private:
// "FuncName" exists. It may create a new function prototype in pre-link mode.
FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
- // Replace a normal function with its native version.
- bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
-
bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
@@ -90,24 +87,6 @@ private:
double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
- // exp
- bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
-
- // exp2
- bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
-
- // exp10
- bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
-
- // log
- bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
-
- // log2
- bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
-
- // log10
- bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
-
// sqrt
bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
@@ -623,7 +602,8 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
Function *Callee = CI->getCalledFunction();
// Ignore indirect calls.
- if (Callee == 0) return false;
+ if (Callee == nullptr)
+ return false;
BasicBlock *BB = CI->getParent();
LLVMContext &Context = CI->getParent()->getContext();
@@ -778,27 +758,6 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
return false;
}
-bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
- Module *M = CI->getModule();
- if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
- FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
- !HasNative(FInfo.getId()))
- return false;
-
- AMDGPULibFunc nf = FInfo;
- nf.setPrefix(AMDGPULibFunc::NATIVE);
- if (FunctionCallee FPExpr = getFunction(M, nf)) {
- LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
-
- CI->setCalledFunction(FPExpr);
-
- LLVM_DEBUG(dbgs() << *CI << '\n');
-
- return true;
- }
- return false;
-}
-
// [native_]half_recip(c) ==> 1.0/c
bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
const FuncInfo &FInfo) {
@@ -1402,8 +1361,8 @@ AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
Function *UCallee = UI->getCalledFunction();
Type *RetType = UCallee->getReturnType();
B.SetInsertPoint(&*ItNew);
- AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
- std::string(prefix) + UI->getName());
+ AllocaInst *Alloc =
+ B.CreateAlloca(RetType, nullptr, std::string(prefix) + UI->getName());
Alloc->setAlignment(
Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
return Alloc;
@@ -1724,7 +1683,8 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
// Ignore indirect calls.
Function *Callee = CI->getCalledFunction();
- if (Callee == 0) continue;
+ if (Callee == nullptr)
+ continue;
LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
dbgs().flush());
@@ -1757,7 +1717,7 @@ PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
// Ignore indirect calls.
Function *Callee = CI->getCalledFunction();
- if (Callee == 0)
+ if (Callee == nullptr)
continue;
LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
@@ -1783,9 +1743,10 @@ bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
// Ignore indirect calls.
Function *Callee = CI->getCalledFunction();
- if (Callee == 0) continue;
+ if (Callee == nullptr)
+ continue;
- if(Simplifier.useNative(CI))
+ if (Simplifier.useNative(CI))
Changed = true;
}
}
@@ -1811,7 +1772,7 @@ PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
// Ignore indirect calls.
Function *Callee = CI->getCalledFunction();
- if (Callee == 0)
+ if (Callee == nullptr)
continue;
if (Simplifier.useNative(CI))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
index c97223b047e8..dc0ac72016f3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
@@ -10,6 +10,7 @@
#define _AMDGPU_LIBFUNC_H_
#include "llvm/ADT/StringRef.h"
+#include <memory>
namespace llvm {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 0c743a77092c..593388a4d819 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -15,9 +15,8 @@
using namespace llvm;
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
- : MachineFunctionInfo(), Mode(MF.getFunction()),
- IsEntryFunction(
- AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
+ : Mode(MF.getFunction()), IsEntryFunction(AMDGPU::isEntryFunctionCC(
+ MF.getFunction().getCallingConv())),
IsModuleEntryFunction(
AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 10ff50040c6a..48cf46b5f871 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -15,8 +15,6 @@
namespace llvm {
-class GCNSubtarget;
-
class AMDGPUMachineFunction : public MachineFunctionInfo {
/// A map to keep track of local memory objects and their offsets within the
/// local memory space.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h b/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
index 8af7979dba8b..5cefc83e25e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
@@ -29,4 +29,4 @@ const char NoteNameV3[] = "AMDGPU";
} // End namespace ElfNote
} // End namespace AMDGPU
} // End namespace llvm
-#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUNOTETYPE_H
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPTNOTE_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index 7c4eb71882c7..f91f31508ad2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -463,7 +463,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
WhatToStore.push_back(Arg);
}
} else if (isa<FixedVectorType>(ArgType)) {
- Type *IType = NULL;
+ Type *IType = nullptr;
uint32_t EleCount = cast<FixedVectorType>(ArgType)->getNumElements();
uint32_t EleSize = ArgType->getScalarSizeInBits();
uint32_t TotalSize = EleCount * EleSize;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index f9a9fe403ff6..2d8126a49327 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -789,6 +789,17 @@ bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) {
Align Alignment =
DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
uint64_t AllocSize = DL.getTypeAllocSize(GV->getValueType());
+
+ // HIP uses an extern unsized array in local address space for dynamically
+ // allocated shared memory. In that case, we have to disable the promotion.
+ if (GV->hasExternalLinkage() && AllocSize == 0) {
+ LocalMemLimit = 0;
+ LLVM_DEBUG(dbgs() << "Function has a reference to externally allocated "
+ "local memory. Promoting to local memory "
+ "disabled.\n");
+ return false;
+ }
+
AllocatedSizes.emplace_back(AllocSize, Alignment);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 3ce67a733c10..0df6f4d45b06 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -36,6 +36,7 @@ protected:
MachineIRBuilder &B;
MachineFunction &MF;
MachineRegisterInfo &MRI;
+ const GCNSubtarget &Subtarget;
const RegisterBankInfo &RBI;
const TargetRegisterInfo &TRI;
const SIInstrInfo &TII;
@@ -44,9 +45,9 @@ protected:
public:
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
: B(B), MF(B.getMF()), MRI(*B.getMRI()),
- RBI(*MF.getSubtarget().getRegBankInfo()),
- TRI(*MF.getSubtarget().getRegisterInfo()),
- TII(*MF.getSubtarget<GCNSubtarget>().getInstrInfo()), Helper(Helper){};
+ Subtarget(MF.getSubtarget<GCNSubtarget>()),
+ RBI(*Subtarget.getRegBankInfo()), TRI(*Subtarget.getRegisterInfo()),
+ TII(*Subtarget.getInstrInfo()), Helper(Helper){};
bool isVgprRegBank(Register Reg);
Register getAsVgpr(Register Reg);
@@ -193,7 +194,10 @@ bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3(
MachineInstr &MI, Med3MatchInfo &MatchInfo) {
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
- if (Ty != LLT::scalar(16) && Ty != LLT::scalar(32))
+
+ // med3 for f16 is only available on gfx9+, and not available for v2f16.
+ if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
+ Ty != LLT::scalar(32))
return false;
auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index c60012bcfe2e..de2dccef804a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -718,8 +718,11 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
const unsigned WaveAndOpc = Subtarget.isWave32() ?
AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
- const unsigned MovTermOpc = Subtarget.isWave32() ?
- AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
+ const unsigned MovExecOpc =
+ Subtarget.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ const unsigned MovExecTermOpc =
+ Subtarget.isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
+
const unsigned XorTermOpc = Subtarget.isWave32() ?
AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
const unsigned AndSaveExecOpc = Subtarget.isWave32() ?
@@ -996,12 +999,12 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
// Save the EXEC mask before the loop.
- BuildMI(MBB, MBB.end(), DL, TII->get(MovTermOpc), SaveExecReg)
+ BuildMI(MBB, MBB.end(), DL, TII->get(MovExecOpc), SaveExecReg)
.addReg(ExecReg);
// Restore the EXEC mask after the loop.
B.setMBB(*RestoreExecBB);
- B.buildInstr(MovTermOpc)
+ B.buildInstr(MovExecTermOpc)
.addDef(ExecReg)
.addReg(SaveExecReg);
@@ -2953,7 +2956,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
break;
}
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
- case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: {
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
const AMDGPU::RsrcIntrinsic *RSrcIntrin
= AMDGPU::lookupRsrcIntrinsic(MI.getIntrinsicID());
assert(RSrcIntrin && RSrcIntrin->IsImage);
@@ -3691,6 +3696,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
break;
}
+ case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
+ // This case is weird because we expect a physical register in the source,
+ // but need to set a bank anyway.
+ //
+ // We could select the result to SGPR or VGPR, but for the one current use
+ // it's more practical to always use VGPR.
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
+ break;
+ }
case AMDGPU::G_INSERT: {
unsigned BankID = getMappingType(MRI, MI);
unsigned DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
@@ -4078,7 +4093,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_mqsad_pk_u16_u8:
case Intrinsic::amdgcn_mqsad_u32_u8:
case Intrinsic::amdgcn_cvt_pk_u8_f32:
- case Intrinsic::amdgcn_alignbit:
case Intrinsic::amdgcn_alignbyte:
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_fdot2:
@@ -4276,7 +4290,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
- case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: {
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
+ case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
auto IntrID = MI.getIntrinsicID();
const AMDGPU::RsrcIntrinsic *RSrcIntrin = AMDGPU::lookupRsrcIntrinsic(IntrID);
assert(RSrcIntrin && "missing RsrcIntrinsic for image intrinsic");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 45f7c2f369bd..1c6c63dd5b25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -353,7 +353,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
// off any return attributes, e.g. zeroext doesn't make sense with a struct.
NewFunc->stealArgumentListFrom(F);
- AttrBuilder RetAttrs;
+ AttributeMask RetAttrs;
RetAttrs.addAttribute(Attribute::SExt);
RetAttrs.addAttribute(Attribute::ZExt);
RetAttrs.addAttribute(Attribute::NoAlias);
@@ -433,7 +433,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
PointerType *ArgType = cast<PointerType>(Arg.getType());
- auto *EltTy = ArgType->getElementType();
+ auto *EltTy = ArgType->getPointerElementType();
const auto Align =
DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index cd05797fdbdb..e82f9232b114 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -269,7 +269,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasGetWaveIdInst(false),
HasSMemTimeInst(false),
HasShaderCyclesRegister(false),
- HasRegisterBanking(false),
HasVOP3Literal(false),
HasNoDataDepHazard(false),
FlatAddressSpace(false),
@@ -772,11 +771,11 @@ unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
}
unsigned
-GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const {
+GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratch) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
- if (HasFlatScratchInit || HasArchitectedFlatScratch) {
+ if (HasFlatScratch || HasArchitectedFlatScratch) {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
@@ -794,20 +793,11 @@ unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
}
unsigned GCNSubtarget::getReservedNumSGPRs(const Function &F) const {
- // The logic to detect if the function has
- // flat scratch init is slightly different than how
- // SIMachineFunctionInfo constructor derives.
- // We don't use amdgpu-calls, amdgpu-stack-objects
- // attributes and isAmdHsaOrMesa here as it doesn't really matter.
- // TODO: Outline this derivation logic and have just
- // one common function in the backend to avoid duplication.
- bool isEntry = AMDGPU::isEntryFunctionCC(F.getCallingConv());
- bool FunctionHasFlatScratchInit = false;
- if (hasFlatAddressSpace() && isEntry && !flatScratchIsArchitected() &&
- enableFlatScratch()) {
- FunctionHasFlatScratchInit = true;
- }
- return getBaseReservedNumSGPRs(FunctionHasFlatScratchInit);
+ // In principle we do not need to reserve SGPR pair used for flat_scratch if
+ // we know flat instructions do not access the stack anywhere in the
+ // program. For now assume it's needed if we have flat instructions.
+ const bool KernelUsesFlatScratch = hasFlatAddressSpace();
+ return getBaseReservedNumSGPRs(KernelUsesFlatScratch);
}
unsigned GCNSubtarget::computeOccupancy(const Function &F, unsigned LDSSize,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 88ed4b2b7a24..7f1b94be4ffe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -212,7 +212,19 @@ public:
/// Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const Function &F) const {
- return isAmdHsaOrMesa(F) ? 0 : 36;
+ switch (TargetTriple.getOS()) {
+ case Triple::AMDHSA:
+ case Triple::AMDPAL:
+ case Triple::Mesa3D:
+ return 0;
+ case Triple::UnknownOS:
+ default:
+ // For legacy reasons unknown/other is treated as a different version of
+ // mesa.
+ return 36;
+ }
+
+ llvm_unreachable("invalid triple OS");
}
/// \returns Maximum number of work groups per compute unit supported by the
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 226646a96953..dd3676f3b707 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -21,8 +21,6 @@
namespace llvm {
-class ScheduleDAGMILive;
-
//===----------------------------------------------------------------------===//
// AMDGPU Target Machine (R600+)
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 09c5eb192e1f..a8df7789c8a1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -844,15 +844,8 @@ bool GCNTTIImpl::isInlineAsmSourceOfDivergence(
TLI->ComputeConstraintToUse(TC, SDValue());
- Register AssignedReg;
- const TargetRegisterClass *RC;
- std::tie(AssignedReg, RC) = TLI->getRegForInlineAsmConstraint(
- TRI, TC.ConstraintCode, TC.ConstraintVT);
- if (AssignedReg) {
- // FIXME: This is a workaround for getRegForInlineAsmConstraint
- // returning VS_32
- RC = TRI->getPhysRegClass(AssignedReg);
- }
+ const TargetRegisterClass *RC = TLI->getRegForInlineAsmConstraint(
+ TRI, TC.ConstraintCode, TC.ConstraintVT).second;
// For AGPR constraints null is returned on subtargets without AGPRs, so
// assume divergent for null.
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2bb59086f391..c1c88d9a7462 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -62,7 +62,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
public:
AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
- : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
+ : Kind(Kind_), AsmParser(AsmParser_) {}
using Ptr = std::unique_ptr<AMDGPUOperand>;
@@ -1548,6 +1548,7 @@ private:
bool validateVccOperand(unsigned Reg) const;
bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
+ bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
bool validateAGPRLdSt(const MCInst &Inst) const;
bool validateVGPRAlign(const MCInst &Inst) const;
bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
@@ -3613,6 +3614,40 @@ bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
return true;
}
+bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
+ const OperandVector &Operands) {
+ const unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
+ return true;
+
+ const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ if (Src2Idx == -1)
+ return true;
+
+ const MCOperand &Src2 = Inst.getOperand(Src2Idx);
+ if (!Src2.isReg())
+ return true;
+
+ MCRegister Src2Reg = Src2.getReg();
+ MCRegister DstReg = Inst.getOperand(0).getReg();
+ if (Src2Reg == DstReg)
+ return true;
+
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+ if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
+ return true;
+
+ if (isRegIntersect(Src2Reg, DstReg, TRI)) {
+ Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
+ "source 2 operand must not partially overlap with dst");
+ return false;
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
switch (Inst.getOpcode()) {
default:
@@ -4297,6 +4332,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateMAIAccWrite(Inst, Operands)) {
return false;
}
+ if (!validateMFMA(Inst, Operands)) {
+ return false;
+ }
if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
return false;
}
@@ -4568,7 +4606,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
uint64_t AccumOffset = 0;
SMRange SGPRRange;
uint64_t NextFreeSGPR = 0;
- unsigned UserSGPRCount = 0;
+
+ // Count the number of user SGPRs implied from the enabled feature bits.
+ unsigned ImpliedUserSGPRCount = 0;
+
+ // Track if the asm explicitly contains the directive for the user SGPR
+ // count.
+ Optional<unsigned> ExplicitUserSGPRCount;
bool ReserveVCC = true;
bool ReserveFlatScr = true;
Optional<bool> EnableWavefrontSize32;
@@ -4617,6 +4661,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
return OutOfRangeError(ValRange);
KD.kernarg_size = Val;
+ } else if (ID == ".amdhsa_user_sgpr_count") {
+ ExplicitUserSGPRCount = Val;
} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
@@ -4626,31 +4672,31 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
Val, ValRange);
if (Val)
- UserSGPRCount += 4;
+ ImpliedUserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
Val, ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
@@ -4660,13 +4706,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
ValRange);
if (Val)
- UserSGPRCount += 2;
+ ImpliedUserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
Val, ValRange);
if (Val)
- UserSGPRCount += 1;
+ ImpliedUserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
if (IVersion.Major < 10)
return Error(IDRange.Start, "directive requires gfx10+", IDRange);
@@ -4850,6 +4896,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
SGPRBlocks);
+ if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
+ return TokError("amdgpu_user_sgpr_count smaller than than implied by "
+ "enabled user SGPRs");
+
+ unsigned UserSGPRCount =
+ ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
+
if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
return TokError("too many user SGPRs enabled");
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 104b5160b985..c4043177b618 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -89,7 +89,6 @@ class DS_Real <DS_Pseudo ps> :
!if(!or(ps.has_data0, ps.has_gws_data0), data0{9}, 0));
}
-
// DS Pseudo instructions
class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32>
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index c7ec5308e6d0..c530d3cb49f0 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -915,7 +915,7 @@ class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueT
class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : GCNPat <
(vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)),
- (inst $vaddr, $data, $offset)
+ (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
>;
class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 0f8dd0b3bf58..c0592f6f3c7a 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -95,7 +95,9 @@ static bool isDGEMM(unsigned Opcode) {
return Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64 ||
Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_e64 ||
- Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64;
+ Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64 ||
+ Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64 ||
+ Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64;
}
static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
@@ -1438,7 +1440,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
if (!Use.isReg())
continue;
- unsigned Reg = Use.getReg();
+ Register Reg = Use.getReg();
bool FullReg;
const MachineInstr *MI1;
@@ -1477,6 +1479,8 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
switch (Opc1) {
case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
if (!isXDL(ST, *MI))
NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
break;
@@ -1509,6 +1513,8 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
switch (Opc1) {
case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
break;
case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 162121c2c525..716bc027a894 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -25,7 +25,6 @@ class MachineFunction;
class MachineInstr;
class MachineOperand;
class MachineRegisterInfo;
-class ScheduleDAG;
class SIInstrInfo;
class SIRegisterInfo;
class GCNSubtarget;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 82c09378acac..fb106d98c162 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -27,7 +27,7 @@ void llvm::printLivesAt(SlotIndex SI,
<< *LIS.getInstructionFromIndex(SI);
unsigned Num = 0;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- const unsigned Reg = Register::index2VirtReg(I);
+ const Register Reg = Register::index2VirtReg(I);
if (!LIS.hasInterval(Reg))
continue;
const auto &LI = LIS.getInterval(Reg);
@@ -487,7 +487,7 @@ void GCNRPTracker::printLiveRegs(raw_ostream &OS, const LiveRegSet& LiveRegs,
const MachineRegisterInfo &MRI) {
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
auto It = LiveRegs.find(Reg);
if (It != LiveRegs.end() && It->second.any())
OS << ' ' << printVRegOrUnit(Reg, TRI) << ':'
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 53d6ff0aa731..a6e42ad3dfca 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -140,4 +140,4 @@ public:
} // End namespace llvm
-#endif // GCNSCHEDSTRATEGY_H
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index d8bc0b2df2bd..0cd2cfa2f0e7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -153,7 +153,6 @@ protected:
bool HasGetWaveIdInst;
bool HasSMemTimeInst;
bool HasShaderCyclesRegister;
- bool HasRegisterBanking;
bool HasVOP3Literal;
bool HasNoDataDepHazard;
bool FlatAddressSpace;
@@ -723,10 +722,6 @@ public:
return HasShaderCyclesRegister;
}
- bool hasRegisterBanking() const {
- return HasRegisterBanking;
- }
-
bool hasVOP3Literal() const {
return HasVOP3Literal;
}
@@ -1029,7 +1024,7 @@ public:
/// \returns Reserved number of SGPRs. This is common
/// utility function called by MachineFunction and
/// Function variants of getReservedNumSGPRs.
- unsigned getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const;
+ unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
/// \returns Reserved number of SGPRs for given machine function \p MF.
unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index b68b4b12e750..76663b563150 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1397,21 +1397,26 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
unsigned Vmcnt, Expcnt, Lgkmcnt;
decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
+ bool IsDefaultVmcnt = Vmcnt == getVmcntBitMask(ISA);
+ bool IsDefaultExpcnt = Expcnt == getExpcntBitMask(ISA);
+ bool IsDefaultLgkmcnt = Lgkmcnt == getLgkmcntBitMask(ISA);
+ bool PrintAll = IsDefaultVmcnt && IsDefaultExpcnt && IsDefaultLgkmcnt;
+
bool NeedSpace = false;
- if (Vmcnt != getVmcntBitMask(ISA)) {
+ if (!IsDefaultVmcnt || PrintAll) {
O << "vmcnt(" << Vmcnt << ')';
NeedSpace = true;
}
- if (Expcnt != getExpcntBitMask(ISA)) {
+ if (!IsDefaultExpcnt || PrintAll) {
if (NeedSpace)
O << ' ';
O << "expcnt(" << Expcnt << ')';
NeedSpace = true;
}
- if (Lgkmcnt != getLgkmcntBitMask(ISA)) {
+ if (!IsDefaultLgkmcnt || PrintAll) {
if (NeedSpace)
O << ' ';
O << "lgkmcnt(" << Lgkmcnt << ')';
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 7708579a4491..ded3fb7ab8d9 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -15,8 +15,7 @@
using namespace llvm;
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT,
- const MCTargetOptions &Options)
- : MCAsmInfoELF() {
+ const MCTargetOptions &Options) {
CodePointerSize = (TT.getArch() == Triple::amdgcn) ? 8 : 4;
StackGrowsUp = true;
HasSingleParameterDotFile = false;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 9a9a2c973f44..9578bdb0bad0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -319,6 +319,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
<< KD.private_segment_fixed_size << '\n';
OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
+ PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD,
+ compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT);
+
if (!hasArchitectedFlatScratch(STI))
PRINT_FIELD(
OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 6dd886367302..cf03fd682143 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -131,6 +131,38 @@ def MIMGMIPMappingTable : GenericTable {
let PrimaryKeyName = "getMIMGMIPMappingInfo";
}
+class MIMGBiasMapping<MIMGBaseOpcode bias, MIMGBaseOpcode nobias> {
+ MIMGBaseOpcode Bias = bias;
+ MIMGBaseOpcode NoBias = nobias;
+}
+
+def MIMGBiasMappingTable : GenericTable {
+ let FilterClass = "MIMGBiasMapping";
+ let CppTypeName = "MIMGBiasMappingInfo";
+ let Fields = ["Bias", "NoBias"];
+ string TypeOf_Bias = "MIMGBaseOpcode";
+ string TypeOf_NoBias = "MIMGBaseOpcode";
+
+ let PrimaryKey = ["Bias"];
+ let PrimaryKeyName = "getMIMGBiasMappingInfo";
+}
+
+class MIMGOffsetMapping<MIMGBaseOpcode offset, MIMGBaseOpcode nooffset> {
+ MIMGBaseOpcode Offset = offset;
+ MIMGBaseOpcode NoOffset = nooffset;
+}
+
+def MIMGOffsetMappingTable : GenericTable {
+ let FilterClass = "MIMGOffsetMapping";
+ let CppTypeName = "MIMGOffsetMappingInfo";
+ let Fields = ["Offset", "NoOffset"];
+ string TypeOf_Offset = "MIMGBaseOpcode";
+ string TypeOf_NoOffset = "MIMGBaseOpcode";
+
+ let PrimaryKey = ["Offset"];
+ let PrimaryKeyName = "getMIMGOffsetMappingInfo";
+}
+
class MIMGG16Mapping<MIMGBaseOpcode g, MIMGBaseOpcode g16> {
MIMGBaseOpcode G = g;
MIMGBaseOpcode G16 = g16;
@@ -1070,6 +1102,9 @@ class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
AMDGPUDimProps Dim = I.P.Dim;
AMDGPUImageDimIntrinsicEval DimEval = AMDGPUImageDimIntrinsicEval<I.P>;
+ bits<8> NumOffsetArgs = DimEval.NumOffsetArgs;
+ bits<8> NumBiasArgs = DimEval.NumBiasArgs;
+ bits<8> NumZCompareArgs = DimEval.NumZCompareArgs;
bits<8> NumGradients = DimEval.NumGradientArgs;
bits<8> NumDmask = DimEval.NumDmaskArgs;
bits<8> NumData = DimEval.NumDataArgs;
@@ -1078,6 +1113,9 @@ class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
bits<8> DMaskIndex = DimEval.DmaskArgIndex;
bits<8> VAddrStart = DimEval.VAddrArgIndex;
+ bits<8> OffsetIndex = DimEval.OffsetArgIndex;
+ bits<8> BiasIndex = DimEval.BiasArgIndex;
+ bits<8> ZCompareIndex = DimEval.ZCompareArgIndex;
bits<8> GradientStart = DimEval.GradientArgIndex;
bits<8> CoordStart = DimEval.CoordArgIndex;
bits<8> LodIndex = DimEval.LodArgIndex;
@@ -1089,6 +1127,8 @@ class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
bits<8> TexFailCtrlIndex = DimEval.TexFailCtrlArgIndex;
bits<8> CachePolicyIndex = DimEval.CachePolicyArgIndex;
+ bits<8> BiasTyArg = !add(I.P.NumRetAndDataAnyTypes,
+ !if(!eq(NumOffsetArgs, 0), 0, I.P.ExtraAddrArgs[0].Type.isAny));
bits<8> GradientTyArg = !add(I.P.NumRetAndDataAnyTypes,
!foldl(0, I.P.ExtraAddrArgs, cnt, arg, !add(cnt, arg.Type.isAny)));
bits<8> CoordTyArg = !add(GradientTyArg, !if(I.P.Gradients, 1, 0));
@@ -1096,10 +1136,10 @@ class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
def ImageDimIntrinsicTable : GenericTable {
let FilterClass = "ImageDimIntrinsicInfo";
- let Fields = ["Intr", "BaseOpcode", "Dim", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs",
- "DMaskIndex", "VAddrStart", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd",
+ let Fields = ["Intr", "BaseOpcode", "Dim", "NumOffsetArgs", "NumBiasArgs", "NumZCompareArgs", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs",
+ "DMaskIndex", "VAddrStart", "OffsetIndex", "BiasIndex", "ZCompareIndex", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd",
"RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex",
- "GradientTyArg", "CoordTyArg"];
+ "BiasTyArg", "GradientTyArg", "CoordTyArg"];
string TypeOf_BaseOpcode = "MIMGBaseOpcode";
string TypeOf_Dim = "MIMGDim";
@@ -1132,6 +1172,66 @@ def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;
+// Bias to NoBias Optimization Mapping
+def : MIMGBiasMapping<IMAGE_SAMPLE_B, IMAGE_SAMPLE>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL, IMAGE_SAMPLE_CL>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B, IMAGE_SAMPLE_C>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL, IMAGE_SAMPLE_C_CL>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_O, IMAGE_SAMPLE_O>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL_O, IMAGE_SAMPLE_CL_O>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_O, IMAGE_SAMPLE_C_O>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL_O, IMAGE_SAMPLE_C_CL_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B, IMAGE_GATHER4>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B_CL, IMAGE_GATHER4_CL>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B, IMAGE_GATHER4_C>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL, IMAGE_GATHER4_C_CL>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B_O, IMAGE_GATHER4_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B_CL_O, IMAGE_GATHER4_CL_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B_O, IMAGE_GATHER4_C_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL_O, IMAGE_GATHER4_C_CL_O>;
+
+// Offset to NoOffset Optimization Mapping
+def : MIMGOffsetMapping<IMAGE_SAMPLE_O, IMAGE_SAMPLE>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CL_O, IMAGE_SAMPLE_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_O, IMAGE_SAMPLE_D>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_CL_O, IMAGE_SAMPLE_D_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_O_G16, IMAGE_SAMPLE_D_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_CL_O_G16, IMAGE_SAMPLE_D_CL_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_L_O, IMAGE_SAMPLE_L>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_B_O, IMAGE_SAMPLE_B>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_B_CL_O, IMAGE_SAMPLE_B_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_LZ_O, IMAGE_SAMPLE_LZ>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_O, IMAGE_SAMPLE_C>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CL_O, IMAGE_SAMPLE_C_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_O, IMAGE_SAMPLE_C_D>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_CL_O, IMAGE_SAMPLE_C_D_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_O_G16, IMAGE_SAMPLE_C_D_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_CL_O_G16, IMAGE_SAMPLE_C_D_CL_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_L_O, IMAGE_SAMPLE_C_L>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_B_CL_O, IMAGE_SAMPLE_C_B_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_B_O, IMAGE_SAMPLE_C_B>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_LZ_O, IMAGE_SAMPLE_C_LZ>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_O, IMAGE_GATHER4>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_CL_O, IMAGE_GATHER4_CL>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_L>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_B_O, IMAGE_GATHER4_B>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_B_CL_O, IMAGE_GATHER4_B_CL>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_LZ_O, IMAGE_GATHER4_LZ>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_O, IMAGE_GATHER4_C>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_CL_O, IMAGE_GATHER4_C_CL>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_L>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_B_O, IMAGE_GATHER4_C_B>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_B_CL_O, IMAGE_GATHER4_C_B_CL>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_LZ_O, IMAGE_GATHER4_C_LZ>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_O, IMAGE_SAMPLE_CD>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_CL_O, IMAGE_SAMPLE_CD_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_O, IMAGE_SAMPLE_C_CD>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_CL_O, IMAGE_SAMPLE_C_CD_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_O_G16, IMAGE_SAMPLE_CD_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_CL_O_G16, IMAGE_SAMPLE_CD_CL_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_O_G16, IMAGE_SAMPLE_C_CD_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_CL_O_G16, IMAGE_SAMPLE_C_CD_CL_G16>;
+
// G to G16 Optimization Mapping
def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL, IMAGE_SAMPLE_D_CL_G16>;
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index f9a9a6127322..1e75a0432ec3 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -19,7 +19,6 @@
namespace llvm {
-class R600InstrInfo;
class R600Subtarget;
class R600TargetLowering final : public AMDGPUTargetLowering {
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index fc567f1a1fca..bc8a4786df77 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -29,7 +29,6 @@ enum : uint64_t {
};
}
-class AMDGPUTargetMachine;
class DFAPacketizer;
class MachineFunction;
class MachineInstr;
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h
index 94403b88f21a..92d559b1f8e6 100644
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.h
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -21,12 +21,6 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-namespace llvm {
-
-class MCInstrInfo;
-
-} // namespace llvm
-
#define GET_SUBTARGETINFO_HEADER
#include "R600GenSubtargetInfo.inc"
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 397b2f873515..b81fac36fc95 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -245,6 +245,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
return CallInst::Create(IfBreak, Args, "", Insert);
}
+ if (isa<Argument>(Cond)) {
+ Instruction *Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
+ Value *Args[] = { Cond, Broken };
+ return CallInst::Create(IfBreak, Args, "", Insert);
+ }
+
llvm_unreachable("Unhandled loop condition!");
}
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 580e4bc417a4..107ee5ed5532 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -379,6 +379,8 @@ enum Id { // HwRegCode, (6) [5:0]
ID_FLAT_SCR_LO = 20,
ID_FLAT_SCR_HI = 21,
ID_XNACK_MASK = 22,
+ ID_HW_ID1 = 23,
+ ID_HW_ID2 = 24,
ID_POPS_PACKER = 25,
ID_SHADER_CYCLES = 29,
ID_SYMBOLIC_FIRST_GFX1030_ = ID_SHADER_CYCLES,
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 1f93284fc7ee..33954e11d6c6 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -300,6 +300,13 @@ static bool updateOperand(FoldCandidate &Fold,
assert(!Fold.needsShrink() && "not handled");
if (Fold.isImm()) {
+ if (Old.isTied()) {
+ int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(MI->getOpcode());
+ if (NewMFMAOpc == -1)
+ return false;
+ MI->setDesc(TII.get(NewMFMAOpc));
+ MI->untieRegOperand(0);
+ }
Old.ChangeToImmediate(Fold.ImmToFold);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index d4fe74ecb96e..6078f4a0577a 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1195,7 +1195,8 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
}
} else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
TII->isLoadFromStackSlot(MI, FrameIndex))
- NonVGPRSpillFIs.set(FrameIndex);
+ if (!MFI.isFixedObjectIndex(FrameIndex))
+ NonVGPRSpillFIs.set(FrameIndex);
}
}
@@ -1320,16 +1321,14 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
const BitVector AllSavedRegs = SavedRegs;
SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
- // If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
- const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
-
// We have to anticipate introducing CSR VGPR spills or spill of caller
// save VGPR reserved for SGPR spills as we now always create stack entry
- // for it, if we don't have any stack objects already, since we require
- // an FP if there is a call and stack.
+ // for it, if we don't have any stack objects already, since we require a FP
+ // if there is a call and stack. We will allocate a VGPR for SGPR spills if
+ // there are any SGPR spills. Whether they are CSR spills or otherwise.
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
const bool WillHaveFP =
- FrameInfo.hasCalls() && (HaveAnyCSRVGPR || MFI->VGPRReservedForSGPRSpill);
+ FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
// FP will be specially managed like SP.
if (WillHaveFP || hasFP(MF))
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 56fbb875ffd9..7949dcfa6632 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -13,11 +13,6 @@
namespace llvm {
-class SIInstrInfo;
-class SIMachineFunctionInfo;
-class SIRegisterInfo;
-class GCNSubtarget;
-
class SIFrameLowering final : public AMDGPUFrameLowering {
public:
SIFrameLowering(StackDirection D, Align StackAl, int LAO,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9f138136e6e9..561866b5a398 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -45,10 +45,6 @@ static cl::opt<bool> DisableLoopAlignment(
cl::desc("Do not align and prefetch loops"),
cl::init(false));
-static cl::opt<bool> VGPRReserveforSGPRSpill(
- "amdgpu-reserve-vgpr-for-sgpr-spill",
- cl::desc("Allocates one VGPR for future SGPR Spill"), cl::init(true));
-
static cl::opt<bool> UseDivergentRegisterIndexing(
"amdgpu-use-divergent-register-indexing",
cl::Hidden,
@@ -138,6 +134,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32RegClass);
addRegisterClass(MVT::v4i16, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
+ addRegisterClass(MVT::v8i16, &AMDGPU::SGPR_128RegClass);
+ addRegisterClass(MVT::v8f16, &AMDGPU::SGPR_128RegClass);
}
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
@@ -273,7 +271,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16,
MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32,
MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64,
- MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32 }) {
+ MVT::v8i16, MVT::v8f16, MVT::v16i64, MVT::v16f64,
+ MVT::v32i32, MVT::v32f32 }) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -615,7 +614,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (STI.hasMadF16())
setOperationAction(ISD::FMAD, MVT::f16, Legal);
- for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16}) {
+ for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16,
+ MVT::v8f16}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -677,6 +677,21 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v4f16, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f16, MVT::v2i32);
+ setOperationAction(ISD::LOAD, MVT::v8i16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v8i16, MVT::v4i32);
+ setOperationAction(ISD::LOAD, MVT::v8f16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v8f16, MVT::v4i32);
+
+ setOperationAction(ISD::STORE, MVT::v4i16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::v2i32);
+ setOperationAction(ISD::STORE, MVT::v4f16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v4f16, MVT::v2i32);
+
+ setOperationAction(ISD::STORE, MVT::v8i16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v8i16, MVT::v4i32);
+ setOperationAction(ISD::STORE, MVT::v8f16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v8f16, MVT::v4i32);
+
setOperationAction(ISD::ANY_EXTEND, MVT::v2i32, Expand);
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand);
@@ -686,6 +701,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Expand);
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Expand);
+
if (!Subtarget->hasVOP3PInsts()) {
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
@@ -703,9 +722,20 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMINNUM_IEEE, MVT::v4f16, Custom);
setOperationAction(ISD::FMAXNUM_IEEE, MVT::v4f16, Custom);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::v8f16, Custom);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::v8f16, Custom);
setOperationAction(ISD::FMINNUM, MVT::v4f16, Expand);
setOperationAction(ISD::FMAXNUM, MVT::v4f16, Expand);
+ setOperationAction(ISD::FMINNUM, MVT::v8f16, Expand);
+ setOperationAction(ISD::FMAXNUM, MVT::v8f16, Expand);
+
+ for (MVT Vec16 : { MVT::v8i16, MVT::v8f16 }) {
+ setOperationAction(ISD::BUILD_VECTOR, Vec16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, Vec16, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, Vec16, Expand);
+ }
}
if (Subtarget->hasVOP3PInsts()) {
@@ -739,34 +769,42 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
- setOperationAction(ISD::SHL, MVT::v4i16, Custom);
- setOperationAction(ISD::SRA, MVT::v4i16, Custom);
- setOperationAction(ISD::SRL, MVT::v4i16, Custom);
- setOperationAction(ISD::ADD, MVT::v4i16, Custom);
- setOperationAction(ISD::SUB, MVT::v4i16, Custom);
- setOperationAction(ISD::MUL, MVT::v4i16, Custom);
+ for (MVT VT : { MVT::v4i16, MVT::v8i16 }) {
+ // Split vector operations.
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::ADD, VT, Custom);
+ setOperationAction(ISD::SUB, VT, Custom);
+ setOperationAction(ISD::MUL, VT, Custom);
- setOperationAction(ISD::SMIN, MVT::v4i16, Custom);
- setOperationAction(ISD::SMAX, MVT::v4i16, Custom);
- setOperationAction(ISD::UMIN, MVT::v4i16, Custom);
- setOperationAction(ISD::UMAX, MVT::v4i16, Custom);
+ setOperationAction(ISD::SMIN, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, Custom);
+ setOperationAction(ISD::UMIN, VT, Custom);
+ setOperationAction(ISD::UMAX, VT, Custom);
- setOperationAction(ISD::UADDSAT, MVT::v4i16, Custom);
- setOperationAction(ISD::SADDSAT, MVT::v4i16, Custom);
- setOperationAction(ISD::USUBSAT, MVT::v4i16, Custom);
- setOperationAction(ISD::SSUBSAT, MVT::v4i16, Custom);
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::SADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::SSUBSAT, VT, Custom);
+ }
- setOperationAction(ISD::FADD, MVT::v4f16, Custom);
- setOperationAction(ISD::FMUL, MVT::v4f16, Custom);
- setOperationAction(ISD::FMA, MVT::v4f16, Custom);
+ for (MVT VT : { MVT::v4f16, MVT::v8f16 }) {
+ // Split vector operations.
+ setOperationAction(ISD::FADD, VT, Custom);
+ setOperationAction(ISD::FMUL, VT, Custom);
+ setOperationAction(ISD::FMA, VT, Custom);
+ setOperationAction(ISD::FCANONICALIZE, VT, Custom);
+ }
setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom);
setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom);
setOperationAction(ISD::FMINNUM, MVT::v4f16, Custom);
setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v4f16, Custom);
setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
setOperationAction(ISD::SELECT, MVT::v4i16, Custom);
@@ -803,7 +841,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FABS, MVT::v2f16, Custom);
}
- for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8 }) {
+ for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
+ MVT::v8i16, MVT::v8f16 }) {
setOperationAction(ISD::SELECT, VT, Custom);
}
@@ -2776,6 +2815,7 @@ void SITargetLowering::passSpecialInputs(
SelectionDAG &DAG = CLI.DAG;
const SDLoc &DL = CLI.DL;
+ const Function &F = DAG.getMachineFunction().getFunction();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
const AMDGPUFunctionArgInfo &CallerArgInfo = Info.getArgInfo();
@@ -2887,11 +2927,16 @@ void SITargetLowering::passSpecialInputs(
// If incoming ids are not packed we need to pack them.
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
- NeedWorkItemIDX)
- InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
+ NeedWorkItemIDX) {
+ if (Subtarget->getMaxWorkitemID(F, 0) != 0) {
+ InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
+ } else {
+ InputReg = DAG.getConstant(0, DL, MVT::i32);
+ }
+ }
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
- NeedWorkItemIDY) {
+ NeedWorkItemIDY && Subtarget->getMaxWorkitemID(F, 1) != 0) {
SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
DAG.getShiftAmountConstant(10, MVT::i32, SL));
@@ -2900,7 +2945,7 @@ void SITargetLowering::passSpecialInputs(
}
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
- NeedWorkItemIDZ) {
+ NeedWorkItemIDZ && Subtarget->getMaxWorkitemID(F, 2) != 0) {
SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
DAG.getShiftAmountConstant(20, MVT::i32, SL));
@@ -2909,13 +2954,21 @@ void SITargetLowering::passSpecialInputs(
}
if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
- // Workitem ids are already packed, any of present incoming arguments
- // will carry all required fields.
- ArgDescriptor IncomingArg = ArgDescriptor::createArg(
- IncomingArgX ? *IncomingArgX :
- IncomingArgY ? *IncomingArgY :
- *IncomingArgZ, ~0u);
- InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, IncomingArg);
+ if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
+ // We're in a situation where the outgoing function requires the workitem
+ // ID, but the calling function does not have it (e.g a graphics function
+ // calling a C calling convention function). This is illegal, but we need
+ // to produce something.
+ InputReg = DAG.getUNDEF(MVT::i32);
+ } else {
+ // Workitem ids are already packed, any of present incoming arguments
+ // will carry all required fields.
+ ArgDescriptor IncomingArg = ArgDescriptor::createArg(
+ IncomingArgX ? *IncomingArgX :
+ IncomingArgY ? *IncomingArgY :
+ *IncomingArgZ, ~0u);
+ InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, IncomingArg);
+ }
}
if (OutgoingArg->isRegister()) {
@@ -4600,7 +4653,8 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
- VT == MVT::v8f32 || VT == MVT::v16f32 || VT == MVT::v32f32);
+ VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8f32 ||
+ VT == MVT::v16f32 || VT == MVT::v32f32);
SDValue Lo0, Hi0;
std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
@@ -4621,21 +4675,26 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
- assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
- VT == MVT::v8f32 || VT == MVT::v16f32 || VT == MVT::v32f32);
+ assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
+ VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v8f32 ||
+ VT == MVT::v16f32 || VT == MVT::v32f32);
SDValue Lo0, Hi0;
- std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
+ SDValue Op0 = Op.getOperand(0);
+ std::tie(Lo0, Hi0) = Op0.getValueType().isVector()
+ ? DAG.SplitVectorOperand(Op.getNode(), 0)
+ : std::make_pair(Op0, Op0);
SDValue Lo1, Hi1;
std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1);
SDValue Lo2, Hi2;
std::tie(Lo2, Hi2) = DAG.SplitVectorOperand(Op.getNode(), 2);
SDLoc SL(Op);
+ auto ResVT = DAG.GetSplitDestVTs(VT);
- SDValue OpLo = DAG.getNode(Opc, SL, Lo0.getValueType(), Lo0, Lo1, Lo2,
+ SDValue OpLo = DAG.getNode(Opc, SL, ResVT.first, Lo0, Lo1, Lo2,
Op->getFlags());
- SDValue OpHi = DAG.getNode(Opc, SL, Hi0.getValueType(), Hi0, Hi1, Hi2,
+ SDValue OpHi = DAG.getNode(Opc, SL, ResVT.second, Hi0, Hi1, Hi2,
Op->getFlags());
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
@@ -5297,7 +5356,7 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
if (IsIEEEMode)
return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);
- if (VT == MVT::v4f16)
+ if (VT == MVT::v4f16 || VT == MVT::v8f16)
return splitBinaryVectorOp(Op, DAG);
return Op;
}
@@ -5501,6 +5560,22 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
MachineMemOperand::MOInvariant);
}
+/// Return true if the value is a known valid address, such that a null check is
+/// not necessary.
+static bool isKnownNonNull(SDValue Val, SelectionDAG &DAG,
+ const AMDGPUTargetMachine &TM, unsigned AddrSpace) {
+ if (isa<FrameIndexSDNode>(Val) || isa<GlobalAddressSDNode>(Val) ||
+ isa<BasicBlockSDNode>(Val))
+ return true;
+
+ if (auto *ConstVal = dyn_cast<ConstantSDNode>(Val))
+ return ConstVal->getSExtValue() != TM.getNullPointerValue(AddrSpace);
+
+ // TODO: Search through arithmetic, handle arguments and loads
+ // marked nonnull.
+ return false;
+}
+
SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
@@ -5508,48 +5583,64 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
SDValue Src = ASC->getOperand(0);
SDValue FlatNullPtr = DAG.getConstant(0, SL, MVT::i64);
+ unsigned SrcAS = ASC->getSrcAddressSpace();
const AMDGPUTargetMachine &TM =
static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
// flat -> local/private
- if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
+ if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
unsigned DestAS = ASC->getDestAddressSpace();
if (DestAS == AMDGPUAS::LOCAL_ADDRESS ||
DestAS == AMDGPUAS::PRIVATE_ADDRESS) {
+ SDValue Ptr = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
+
+ if (isKnownNonNull(Src, DAG, TM, SrcAS))
+ return Ptr;
+
unsigned NullVal = TM.getNullPointerValue(DestAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
- SDValue Ptr = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
- return DAG.getNode(ISD::SELECT, SL, MVT::i32,
- NonNull, Ptr, SegmentNullPtr);
+ return DAG.getNode(ISD::SELECT, SL, MVT::i32, NonNull, Ptr,
+ SegmentNullPtr);
}
}
// local/private -> flat
if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
- unsigned SrcAS = ASC->getSrcAddressSpace();
-
if (SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
SrcAS == AMDGPUAS::PRIVATE_ADDRESS) {
+
+ SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), SL, DAG);
+ SDValue CvtPtr =
+ DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture);
+ CvtPtr = DAG.getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr);
+
+ if (isKnownNonNull(Src, DAG, TM, SrcAS))
+ return CvtPtr;
+
unsigned NullVal = TM.getNullPointerValue(SrcAS);
SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
SDValue NonNull
= DAG.getSetCC(SL, MVT::i1, Src, SegmentNullPtr, ISD::SETNE);
- SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), SL, DAG);
- SDValue CvtPtr
- = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture);
-
- return DAG.getNode(ISD::SELECT, SL, MVT::i64, NonNull,
- DAG.getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr),
+ return DAG.getNode(ISD::SELECT, SL, MVT::i64, NonNull, CvtPtr,
FlatNullPtr);
}
}
+ if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
+ Op.getValueType() == MVT::i64) {
+ const SIMachineFunctionInfo *Info =
+ DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
+ SDValue Hi = DAG.getConstant(Info->get32BitAddressHighBits(), SL, MVT::i32);
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Hi);
+ return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
+ }
+
if (ASC->getDestAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
Src.getValueType() == MVT::i64)
return DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
@@ -5676,7 +5767,6 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
EVT VecVT = Vec.getValueType();
unsigned VecSize = VecVT.getSizeInBits();
EVT EltVT = VecVT.getVectorElementType();
- assert(VecSize <= 64);
DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
@@ -5687,6 +5777,28 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI))
return Combined;
+ if (VecSize == 128) {
+ SDValue Lo, Hi;
+ EVT LoVT, HiVT;
+ SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
+ Lo =
+ DAG.getBitcast(LoVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64,
+ V2, DAG.getConstant(0, SL, MVT::i32)));
+ Hi =
+ DAG.getBitcast(HiVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64,
+ V2, DAG.getConstant(1, SL, MVT::i32)));
+ EVT IdxVT = Idx.getValueType();
+ unsigned NElem = VecVT.getVectorNumElements();
+ assert(isPowerOf2_32(NElem));
+ SDValue IdxMask = DAG.getConstant(NElem / 2 - 1, SL, IdxVT);
+ SDValue NewIdx = DAG.getNode(ISD::AND, SL, IdxVT, Idx, IdxMask);
+ SDValue Half = DAG.getSelectCC(SL, Idx, IdxMask, Hi, Lo, ISD::SETUGT);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Half, NewIdx);
+ }
+
+ assert(VecSize <= 64);
+
unsigned EltSize = EltVT.getSizeInBits();
assert(isPowerOf2_32(EltSize));
@@ -5769,20 +5881,27 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
SDLoc SL(Op);
EVT VT = Op.getValueType();
- if (VT == MVT::v4i16 || VT == MVT::v4f16) {
- EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), 2);
+ if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8i16 || VT == MVT::v8f16) {
+ EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
+ VT.getVectorNumElements() / 2);
+ MVT HalfIntVT = MVT::getIntegerVT(HalfVT.getSizeInBits());
// Turn into pair of packed build_vectors.
// TODO: Special case for constants that can be materialized with s_mov_b64.
- SDValue Lo = DAG.getBuildVector(HalfVT, SL,
- { Op.getOperand(0), Op.getOperand(1) });
- SDValue Hi = DAG.getBuildVector(HalfVT, SL,
- { Op.getOperand(2), Op.getOperand(3) });
+ SmallVector<SDValue, 4> LoOps, HiOps;
+ for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I != E; ++I) {
+ LoOps.push_back(Op.getOperand(I));
+ HiOps.push_back(Op.getOperand(I + E));
+ }
+ SDValue Lo = DAG.getBuildVector(HalfVT, SL, LoOps);
+ SDValue Hi = DAG.getBuildVector(HalfVT, SL, HiOps);
- SDValue CastLo = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Lo);
- SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Hi);
+ SDValue CastLo = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Lo);
+ SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Hi);
- SDValue Blend = DAG.getBuildVector(MVT::v2i32, SL, { CastLo, CastHi });
+ SDValue Blend = DAG.getBuildVector(MVT::getVectorVT(HalfIntVT, 2), SL,
+ { CastLo, CastHi });
return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
}
@@ -6155,10 +6274,6 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
- const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
- AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
- const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
- AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
unsigned IntrOpcode = Intr->BaseOpcode;
bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
@@ -6246,28 +6361,6 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
unsigned VAddrEnd = ArgOffset + Intr->VAddrEnd;
SmallVector<SDValue, 4> VAddrs;
- // Optimize _L to _LZ when _L is zero
- if (LZMappingInfo) {
- if (auto *ConstantLod = dyn_cast<ConstantFPSDNode>(
- Op.getOperand(ArgOffset + Intr->LodIndex))) {
- if (ConstantLod->isZero() || ConstantLod->isNegative()) {
- IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
- VAddrEnd--; // remove 'lod'
- }
- }
- }
-
- // Optimize _mip away, when 'lod' is zero
- if (MIPMappingInfo) {
- if (auto *ConstantLod = dyn_cast<ConstantSDNode>(
- Op.getOperand(ArgOffset + Intr->MipIndex))) {
- if (ConstantLod->isZero()) {
- IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip
- VAddrEnd--; // remove 'mip'
- }
- }
- }
-
// Check for 16 bit addresses or derivatives and pack if true.
MVT VAddrVT =
Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType();
@@ -6283,12 +6376,18 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
// Push back extra arguments.
for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) {
if (IsA16 && (Op.getOperand(ArgOffset + I).getValueType() == MVT::f16)) {
+ assert(I == Intr->BiasIndex && "Got unexpected 16-bit extra argument");
// Special handling of bias when A16 is on. Bias is of type half but
// occupies full 32-bit.
- SDValue bias = DAG.getBuildVector( MVT::v2f16, DL, {Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)});
- VAddrs.push_back(bias);
- } else
+ SDValue Bias = DAG.getBuildVector(
+ MVT::v2f16, DL,
+ {Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)});
+ VAddrs.push_back(Bias);
+ } else {
+ assert((!IsA16 || Intr->NumBiasArgs == 0 || I != Intr->BiasIndex) &&
+ "Bias needs to be converted to 16 bit in A16 mode");
VAddrs.push_back(Op.getOperand(ArgOffset + I));
+ }
}
if (BaseOpcode->Gradients && !ST->hasG16() && (IsA16 != IsG16)) {
@@ -6731,14 +6830,23 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
case Intrinsic::amdgcn_workitem_id_x:
+ if (Subtarget->getMaxWorkitemID(MF.getFunction(), 0) == 0)
+ return DAG.getConstant(0, DL, MVT::i32);
+
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDX);
case Intrinsic::amdgcn_workitem_id_y:
+ if (Subtarget->getMaxWorkitemID(MF.getFunction(), 1) == 0)
+ return DAG.getConstant(0, DL, MVT::i32);
+
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDY);
case Intrinsic::amdgcn_workitem_id_z:
+ if (Subtarget->getMaxWorkitemID(MF.getFunction(), 2) == 0)
+ return DAG.getConstant(0, DL, MVT::i32);
+
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDZ);
@@ -6899,9 +7007,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(1, SL, MVT::i32));
return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ);
}
- case Intrinsic::amdgcn_alignbit:
- return DAG.getNode(ISD::FSHR, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_perm:
return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
@@ -8408,21 +8513,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
+ if (VT.getSizeInBits() == 128)
+ return splitTernaryVectorOp(Op, DAG);
+
assert(VT.getSizeInBits() == 64);
SDLoc DL(Op);
SDValue Cond = Op.getOperand(0);
- if (Subtarget->hasScalarCompareEq64() && Op->getOperand(0)->hasOneUse() &&
- !Op->isDivergent()) {
- if (VT == MVT::i64)
- return Op;
- SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(1));
- SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(2));
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getSelect(DL, MVT::i64, Cond, LHS, RHS));
- }
-
SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
SDValue One = DAG.getConstant(1, DL, MVT::i32);
@@ -9550,6 +9648,9 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
SDValue SITargetLowering::performXorCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
+ if (SDValue RV = reassociateScalarOps(N, DCI.DAG))
+ return RV;
+
EVT VT = N->getValueType(0);
if (VT != MVT::i64)
return SDValue();
@@ -10462,6 +10563,9 @@ SDValue SITargetLowering::reassociateScalarOps(SDNode *N,
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
+ if (DAG.isBaseWithConstantOffset(SDValue(N, 0)))
+ return SDValue();
+
unsigned Opc = N->getOpcode();
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
@@ -10483,12 +10587,6 @@ SDValue SITargetLowering::reassociateScalarOps(SDNode *N,
if (Op1->isDivergent())
std::swap(Op1, Op2);
- // If either operand is constant this will conflict with
- // DAGCombiner::ReassociateOps().
- if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
- DAG.isConstantIntBuildVectorOrConstantInt(Op1))
- return SDValue();
-
SDLoc SL(N);
SDValue Add1 = DAG.getNode(Opc, SL, VT, Op0, Op1);
return DAG.getNode(Opc, SL, VT, Add1, Op2);
@@ -11130,7 +11228,9 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
unsigned TFEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::tfe) - 1;
unsigned LWEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::lwe) - 1;
bool UsesTFC = ((int(TFEIdx) >= 0 && Node->getConstantOperandVal(TFEIdx)) ||
- Node->getConstantOperandVal(LWEIdx)) ? 1 : 0;
+ Node->getConstantOperandVal(LWEIdx))
+ ? true
+ : false;
unsigned TFCLane = 0;
bool HasChain = Node->getNumValues() > 1;
@@ -11719,25 +11819,51 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
return std::make_pair(0U, RC);
}
- if (Constraint.size() > 1) {
- if (Constraint[1] == 'v') {
+ if (Constraint.startswith("{") && Constraint.endswith("}")) {
+ StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
+ if (RegName.consume_front("v")) {
RC = &AMDGPU::VGPR_32RegClass;
- } else if (Constraint[1] == 's') {
+ } else if (RegName.consume_front("s")) {
RC = &AMDGPU::SGPR_32RegClass;
- } else if (Constraint[1] == 'a') {
+ } else if (RegName.consume_front("a")) {
RC = &AMDGPU::AGPR_32RegClass;
}
if (RC) {
uint32_t Idx;
- bool Failed = Constraint.substr(2).getAsInteger(10, Idx);
- if (!Failed && Idx < RC->getNumRegs())
- return std::make_pair(RC->getRegister(Idx), RC);
+ if (RegName.consume_front("[")) {
+ uint32_t End;
+ bool Failed = RegName.consumeInteger(10, Idx);
+ Failed |= !RegName.consume_front(":");
+ Failed |= RegName.consumeInteger(10, End);
+ Failed |= !RegName.consume_back("]");
+ if (!Failed) {
+ uint32_t Width = (End - Idx + 1) * 32;
+ MCRegister Reg = RC->getRegister(Idx);
+ if (SIRegisterInfo::isVGPRClass(RC))
+ RC = TRI->getVGPRClassForBitWidth(Width);
+ else if (SIRegisterInfo::isSGPRClass(RC))
+ RC = TRI->getSGPRClassForBitWidth(Width);
+ else if (SIRegisterInfo::isAGPRClass(RC))
+ RC = TRI->getAGPRClassForBitWidth(Width);
+ if (RC) {
+ Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, RC);
+ return std::make_pair(Reg, RC);
+ }
+ }
+ } else {
+ bool Failed = RegName.getAsInteger(10, Idx);
+ if (!Failed && Idx < RC->getNumRegs())
+ return std::make_pair(RC->getRegister(Idx), RC);
+ }
}
}
- // FIXME: Returns VS_32 for physical SGPR constraints
- return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+ auto Ret = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+ if (Ret.first)
+ Ret.second = TRI->getPhysRegClass(Ret.first);
+
+ return Ret;
}
static bool isImmConstraint(StringRef Constraint) {
@@ -11975,13 +12101,6 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
}
TargetLoweringBase::finalizeLowering(MF);
-
- // Allocate a VGPR for future SGPR Spill if
- // "amdgpu-reserve-vgpr-for-sgpr-spill" option is used
- // FIXME: We won't need this hack if we split SGPR allocation from VGPR
- if (VGPRReserveforSGPRSpill && TRI->spillSGPRToVGPR() &&
- !Info->VGPRReservedForSGPRSpill && !Info->isEntryFunction())
- Info->reserveVGPRforSGPRSpills(MF);
}
void SITargetLowering::computeKnownBitsForFrameIndex(
@@ -12441,17 +12560,10 @@ bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
for (auto &TC : TargetConstraints) {
if (TC.Type == InlineAsm::isOutput) {
ComputeConstraintToUse(TC, SDValue());
- unsigned AssignedReg;
- const TargetRegisterClass *RC;
- std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint(
- SIRI, TC.ConstraintCode, TC.ConstraintVT);
- if (RC) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg))
- return true;
- else if (SIRI->isSGPRClass(RC))
- return true;
- }
+ const TargetRegisterClass *RC = getRegForInlineAsmConstraint(
+ SIRI, TC.ConstraintCode, TC.ConstraintVT).second;
+ if (RC && SIRI->isSGPRClass(RC))
+ return true;
}
}
}
@@ -12475,3 +12587,27 @@ SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
Cost.first += (Size + 255) / 256;
return Cost;
}
+
+bool SITargetLowering::hasMemSDNodeUser(SDNode *N) const {
+ SDNode::use_iterator I = N->use_begin(), E = N->use_end();
+ for (; I != E; ++I) {
+ if (MemSDNode *M = dyn_cast<MemSDNode>(*I)) {
+ if (getBasePtrIndex(M) == I.getOperandNo())
+ return true;
+ }
+ }
+ return false;
+}
+
+bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
+ SDValue N1) const {
+ if (!N0.hasOneUse())
+ return false;
+ // Take care of the oportunity to keep N0 uniform
+ if (N0->isDivergent() || !N1->isDivergent())
+ return true;
+ // Check if we have a good chance to form the memory access pattern with the
+ // base and offset
+ return (DAG.isBaseWithConstantOffset(N0) &&
+ hasMemSDNodeUser(*N0->use_begin()));
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1315cc15dd02..bf81e082b478 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -449,6 +449,11 @@ public:
bool isSDNodeSourceOfDivergence(const SDNode *N,
FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override;
+ bool hasMemSDNodeUser(SDNode *N) const;
+
+ bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
+ SDValue N1) const override;
+
bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
unsigned MaxDepth = 5) const;
bool isCanonicalized(Register Reg, MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 6fbe5d45ce0a..f8a10bc8ef6f 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -863,7 +863,7 @@ bool SIInsertWaitcnts::applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
Wait.ExpCnt = ~0u;
LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
- << "Old Instr: " << MI << "New Instr: " << *WaitcntInstr
+ << "Old Instr: " << *MI << "New Instr: " << *WaitcntInstr
<< '\n');
} else {
WaitcntInstr->eraseFromParent();
@@ -886,7 +886,7 @@ bool SIInsertWaitcnts::applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
Wait.VsCnt = ~0u;
LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
- << "Old Instr: " << MI
+ << "Old Instr: " << *MI
<< "New Instr: " << *WaitcntVsCntInstr << '\n');
} else {
WaitcntVsCntInstr->eraseFromParent();
@@ -1382,7 +1382,6 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
for (auto T : inst_counter_types()) {
// Merge event flags for this counter
- const bool OldOutOfOrder = counterOutOfOrder(T);
const unsigned OldEvents = PendingEvents & WaitEventMaskForInst[T];
const unsigned OtherEvents = Other.PendingEvents & WaitEventMaskForInst[T];
if (OtherEvents & ~OldEvents)
@@ -1425,7 +1424,7 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
}
}
- if (RegStrictDom && !OldOutOfOrder)
+ if (RegStrictDom)
StrictDom = true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1755b93538ce..0a2f9381e71f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -130,10 +130,24 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
return false;
}
+static bool readsExecAsData(const MachineInstr &MI) {
+ if (MI.isCompare())
+ return true;
+
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AMDGPU::V_READFIRSTLANE_B32:
+ return true;
+ }
+
+ return false;
+}
+
bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const {
// Any implicit use of exec by VALU is not a real register read.
return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() &&
- isVALU(*MO.getParent());
+ isVALU(*MO.getParent()) && !readsExecAsData(*MO.getParent());
}
bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
@@ -3184,10 +3198,14 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
+ int NewMFMAOpc = -1;
switch (Opc) {
default:
- return nullptr;
+ NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc);
+ if (NewMFMAOpc == -1)
+ return nullptr;
+ break;
case AMDGPU::V_MAC_F16_e64:
case AMDGPU::V_FMAC_F16_e64:
IsF16 = true;
@@ -3216,6 +3234,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
}
}
+ MachineInstrBuilder MIB;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ if (NewMFMAOpc != -1) {
+ MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc));
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
+ MIB.add(MI.getOperand(I));
+ updateLiveVariables(LV, MI, *MIB);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ return MIB;
+ }
+
const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
const MachineOperand *Src0Mods =
@@ -3226,8 +3257,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
- MachineInstrBuilder MIB;
- MachineBasicBlock &MBB = *MI.getParent();
if (!Src0Mods && !Src1Mods && !Clamp && !Omod && !IsF64 &&
// If we have an SGPR input, we will violate the constant bus restriction.
@@ -4520,6 +4549,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ if (Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
+ const MachineOperand &SrcOp = MI.getOperand(1);
+ if (!SrcOp.isReg() || SrcOp.getReg().isVirtual()) {
+ ErrInfo = "pseudo expects only physical SGPRs";
+ return false;
+ }
+ }
+
return true;
}
@@ -6122,11 +6159,8 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
continue;
case AMDGPU::S_CSELECT_B32:
- lowerSelect32(Worklist, Inst, MDT);
- Inst.eraseFromParent();
- continue;
case AMDGPU::S_CSELECT_B64:
- splitSelect64(Worklist, Inst, MDT);
+ lowerSelect(Worklist, Inst, MDT);
Inst.eraseFromParent();
continue;
case AMDGPU::S_CMP_EQ_I32:
@@ -6304,8 +6338,8 @@ SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
return std::make_pair(false, nullptr);
}
-void SIInstrInfo::lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT) const {
+void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -6380,95 +6414,6 @@ void SIInstrInfo::lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst,
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::splitSelect64(SetVectorType &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT) const {
- // Split S_CSELECT_B64 into a pair of S_CSELECT_B32 and lower them
- // further.
- const DebugLoc &DL = Inst.getDebugLoc();
- MachineBasicBlock::iterator MII = Inst;
- MachineBasicBlock &MBB = *Inst.getParent();
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-
- // Get the original operands.
- MachineOperand &Dest = Inst.getOperand(0);
- MachineOperand &Src0 = Inst.getOperand(1);
- MachineOperand &Src1 = Inst.getOperand(2);
- MachineOperand &Cond = Inst.getOperand(3);
-
- Register SCCSource = Cond.getReg();
- bool IsSCC = (SCCSource == AMDGPU::SCC);
-
- // If this is a trivial select where the condition is effectively not SCC
- // (SCCSource is a source of copy to SCC), then the select is semantically
- // equivalent to copying SCCSource. Hence, there is no need to create
- // V_CNDMASK, we can just use that and bail out.
- if (!IsSCC && (Src0.isImm() && Src0.getImm() == -1) &&
- (Src1.isImm() && Src1.getImm() == 0)) {
- MRI.replaceRegWith(Dest.getReg(), SCCSource);
- return;
- }
-
- // Prepare the split destination.
- Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
- // Split the source operands.
- const TargetRegisterClass *Src0RC = nullptr;
- const TargetRegisterClass *Src0SubRC = nullptr;
- if (Src0.isReg()) {
- Src0RC = MRI.getRegClass(Src0.getReg());
- Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
- }
- const TargetRegisterClass *Src1RC = nullptr;
- const TargetRegisterClass *Src1SubRC = nullptr;
- if (Src1.isReg()) {
- Src1RC = MRI.getRegClass(Src1.getReg());
- Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
- }
- // Split lo.
- MachineOperand SrcReg0Sub0 =
- buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
- MachineOperand SrcReg1Sub0 =
- buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
- // Split hi.
- MachineOperand SrcReg0Sub1 =
- buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
- MachineOperand SrcReg1Sub1 =
- buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
- // Select the lo part.
- MachineInstr *LoHalf =
- BuildMI(MBB, MII, DL, get(AMDGPU::S_CSELECT_B32), DestSub0)
- .add(SrcReg0Sub0)
- .add(SrcReg1Sub0);
- // Replace the condition operand with the original one.
- LoHalf->getOperand(3).setReg(SCCSource);
- Worklist.insert(LoHalf);
- // Select the hi part.
- MachineInstr *HiHalf =
- BuildMI(MBB, MII, DL, get(AMDGPU::S_CSELECT_B32), DestSub1)
- .add(SrcReg0Sub1)
- .add(SrcReg1Sub1);
- // Replace the condition operand with the original one.
- HiHalf->getOperand(3).setReg(SCCSource);
- Worklist.insert(HiHalf);
- // Merge them back to the original 64-bit one.
- BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
- .addReg(DestSub0)
- .addImm(AMDGPU::sub0)
- .addReg(DestSub1)
- .addImm(AMDGPU::sub1);
- MRI.replaceRegWith(Dest.getReg(), FullDestReg);
-
- // Try to legalize the operands in case we need to swap the order to keep
- // it valid.
- legalizeOperands(*LoHalf, MDT);
- legalizeOperands(*HiHalf, MDT);
-
- // Move all users of this moved value.
- addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
-}
-
void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
@@ -7820,6 +7765,12 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
}
}
+ if (isMAI(Opcode)) {
+ int MFMAOp = AMDGPU::getMFMAEarlyClobberOp(Opcode);
+ if (MFMAOp != -1)
+ Opcode = MFMAOp;
+ }
+
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
// -1 means that Opcode is already a native instruction.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index dd9ea2b53ca2..e551d6c7223f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -78,11 +78,8 @@ private:
moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
MachineDominatorTree *MDT = nullptr) const;
- void lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT = nullptr) const;
-
- void splitSelect64(SetVectorType &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT = nullptr) const;
+ void lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
void lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const;
@@ -1249,6 +1246,10 @@ namespace AMDGPU {
LLVM_READONLY
int getFlatScratchInstSVfromSS(uint16_t Opcode);
+ /// \returns earlyclobber version of a MAC MFMA is exists.
+ LLVM_READONLY
+ int getMFMAEarlyClobberOp(uint16_t Opcode);
+
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index dda92d3d25ff..713a08907e99 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2588,6 +2588,14 @@ def getFlatScratchInstSVfromSS : InstrMapping {
let ValueCols = [["SV"]];
}
+def getMFMAEarlyClobberOp : InstrMapping {
+ let FilterClass = "MFMATable";
+ let RowFields = ["FMAOp"];
+ let ColFields = ["IsMac"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
include "SIInstructions.td"
include "DSInstructions.td"
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 636337ede000..7be63ae6964b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1011,7 +1011,7 @@ def : GCNPat <
}
def : GCNPat <
- (i32 (ctpop i32:$popcnt)),
+ (i32 (DivergentUnaryFrag<ctpop> i32:$popcnt)),
(V_BCNT_U32_B32_e64 VSrc_b32:$popcnt, (i32 0))
>;
@@ -1020,6 +1020,14 @@ def : GCNPat <
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
+def : GCNPat <
+ (i64 (DivergentUnaryFrag<ctpop> i64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (V_BCNT_U32_B32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub1)),
+ (i32 (V_BCNT_U32_B32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0)))), sub0,
+ (i32 (V_MOV_B32_e32 (i32 0))), sub1)
+>;
+
/********** ============================================ **********/
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
@@ -1184,6 +1192,26 @@ def : Pat <
(v2f16 (EXTRACT_SUBREG v4f16:$vec, sub1))
>;
+def : Pat <
+ (extract_subvector v8i16:$vec, (i32 0)),
+ (v4i16 (EXTRACT_SUBREG v8i16:$vec, sub0_sub1))
+>;
+
+def : Pat <
+ (extract_subvector v8i16:$vec, (i32 4)),
+ (v4i16 (EXTRACT_SUBREG v8i16:$vec, sub2_sub3))
+>;
+
+def : Pat <
+ (extract_subvector v8f16:$vec, (i32 0)),
+ (v4f16 (EXTRACT_SUBREG v8f16:$vec, sub0_sub1))
+>;
+
+def : Pat <
+ (extract_subvector v8f16:$vec, (i32 4)),
+ (v4f16 (EXTRACT_SUBREG v8f16:$vec, sub2_sub3))
+>;
+
foreach Index = 0-31 in {
def Extract_Element_v32i32_#Index : Extract_Element <
i32, v32i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -1279,6 +1307,26 @@ def : BitConvert <v2i64, v2f64, VReg_128>;
def : BitConvert <v2f64, v2i64, VReg_128>;
def : BitConvert <v4f32, v2i64, VReg_128>;
def : BitConvert <v2i64, v4f32, VReg_128>;
+def : BitConvert <v8i16, v4i32, SReg_128>;
+def : BitConvert <v4i32, v8i16, SReg_128>;
+def : BitConvert <v8f16, v4f32, VReg_128>;
+def : BitConvert <v8f16, v4i32, VReg_128>;
+def : BitConvert <v4f32, v8f16, VReg_128>;
+def : BitConvert <v4i32, v8f16, VReg_128>;
+def : BitConvert <v8i16, v8f16, VReg_128>;
+def : BitConvert <v8f16, v8i16, VReg_128>;
+def : BitConvert <v4f32, v8i16, VReg_128>;
+def : BitConvert <v8i16, v4f32, VReg_128>;
+def : BitConvert <v8i16, v8f16, SReg_128>;
+def : BitConvert <v8i16, v2i64, SReg_128>;
+def : BitConvert <v8i16, v2f64, SReg_128>;
+def : BitConvert <v8f16, v2i64, SReg_128>;
+def : BitConvert <v8f16, v2f64, SReg_128>;
+def : BitConvert <v8f16, v8i16, SReg_128>;
+def : BitConvert <v2i64, v8i16, SReg_128>;
+def : BitConvert <v2f64, v8i16, SReg_128>;
+def : BitConvert <v2i64, v8f16, SReg_128>;
+def : BitConvert <v2f64, v8f16, SReg_128>;
// 160-bit bitcast
def : BitConvert <v5i32, v5f32, SReg_160>;
@@ -1762,44 +1810,44 @@ def BFIImm32 : PatFrag<
// (y & x) | (z & ~x)
def : AMDGPUPat <
(DivergentBinFrag<or> (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
- (V_BFI_B32_e64 $x, $y, $z)
+ (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
>;
// (y & C) | (z & ~C)
def : AMDGPUPat <
(BFIImm32 i32:$x, i32:$y, i32:$z),
- (V_BFI_B32_e64 $x, $y, $z)
+ (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
>;
// 64-bit version
def : AMDGPUPat <
(DivergentBinFrag<or> (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
- (REG_SEQUENCE SReg_64,
- (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub0))), sub0,
- (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub1))), sub1)
+ (REG_SEQUENCE VReg_64,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
>;
// SHA-256 Ch function
// z ^ (x & (y ^ z))
def : AMDGPUPat <
(DivergentBinFrag<xor> i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
- (V_BFI_B32_e64 $x, $y, $z)
+ (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
>;
// 64-bit version
def : AMDGPUPat <
(DivergentBinFrag<xor> i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
- (REG_SEQUENCE SReg_64,
- (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub0))), sub0,
- (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub1))), sub1)
+ (REG_SEQUENCE VReg_64,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0,
+ (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
>;
def : AMDGPUPat <
@@ -2725,21 +2773,21 @@ def : AMDGPUPat <
def : AMDGPUPat <
(DivergentBinFrag<or> (and i32:$x, i32:$z),
(and i32:$y, (or i32:$x, i32:$z))),
- (V_BFI_B32_e64 (V_XOR_B32_e64 i32:$x, i32:$y), i32:$z, i32:$y)
+ (V_BFI_B32_e64 (V_XOR_B32_e64 VSrc_b32:$x, VSrc_b32:$y), VSrc_b32:$z, VSrc_b32:$y)
>;
def : AMDGPUPat <
(DivergentBinFrag<or> (and i64:$x, i64:$z),
(and i64:$y, (or i64:$x, i64:$z))),
- (REG_SEQUENCE SReg_64,
- (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub0))),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub0)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub0))), sub0,
- (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub1))),
- (i32 (EXTRACT_SUBREG SReg_64:$z, sub1)),
- (i32 (EXTRACT_SUBREG SReg_64:$y, sub1))), sub1)
+ (REG_SEQUENCE VReg_64,
+ (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))), sub0,
+ (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))), sub1)
>;
multiclass IntMed3Pat<Instruction med3Inst,
@@ -2825,6 +2873,15 @@ class AMDGPUGenericInstruction : GenericInstruction {
let Namespace = "AMDGPU";
}
+// Convert a wave address to a swizzled vector address (i.e. this is
+// for copying the stack pointer to a vector address appropriate to
+// use in the offset field of mubuf instructions).
+def G_AMDGPU_WAVE_ADDRESS : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
// Returns -1 if the input is zero.
def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
@@ -3027,6 +3084,16 @@ def G_AMDGPU_INTRIN_IMAGE_LOAD : AMDGPUGenericInstruction {
let mayStore = 1;
}
+def G_AMDGPU_INTRIN_IMAGE_LOAD_D16 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins unknown:$intrin, variable_ops);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+
+ // FIXME: Use separate opcode for atomics.
+ let mayStore = 1;
+}
+
// This is equivalent to the G_INTRINSIC*, but the operands may have
// been legalized depending on the subtarget requirements.
def G_AMDGPU_INTRIN_IMAGE_STORE : AMDGPUGenericInstruction {
@@ -3036,6 +3103,13 @@ def G_AMDGPU_INTRIN_IMAGE_STORE : AMDGPUGenericInstruction {
let mayStore = 1;
}
+def G_AMDGPU_INTRIN_IMAGE_STORE_D16 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins unknown:$intrin, variable_ops);
+ let hasSideEffects = 0;
+ let mayStore = 1;
+}
+
def G_AMDGPU_INTRIN_BVH_INTERSECT_RAY : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$intrin, variable_ops);
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index f4d9002e930e..c18637bdbc43 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -105,6 +105,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
unsigned DMask;
InstClassEnum InstClass;
unsigned CPol = 0;
+ bool IsAGPR;
bool UseST64;
int AddrIdx[MaxAddressRegs];
const MachineOperand *AddrReg[MaxAddressRegs];
@@ -158,8 +159,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
return true;
}
- void setMI(MachineBasicBlock::iterator MI, const SIInstrInfo &TII,
- const GCNSubtarget &STM);
+ void setMI(MachineBasicBlock::iterator MI, const SILoadStoreOptimizer &LSO);
};
struct BaseRegisters {
@@ -484,15 +484,16 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
}
void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
- const SIInstrInfo &TII,
- const GCNSubtarget &STM) {
+ const SILoadStoreOptimizer &LSO) {
I = MI;
unsigned Opc = MI->getOpcode();
- InstClass = getInstClass(Opc, TII);
+ InstClass = getInstClass(Opc, *LSO.TII);
if (InstClass == UNKNOWN)
return;
+ IsAGPR = LSO.TRI->hasAGPRs(LSO.getDataRegClass(*MI));
+
switch (InstClass) {
case DS_READ:
EltSize =
@@ -505,7 +506,7 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
: 4;
break;
case S_BUFFER_LOAD_IMM:
- EltSize = AMDGPU::convertSMRDOffsetUnits(STM, 4);
+ EltSize = AMDGPU::convertSMRDOffsetUnits(*LSO.STM, 4);
break;
default:
EltSize = 4;
@@ -513,7 +514,7 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
}
if (InstClass == MIMG) {
- DMask = TII.getNamedOperand(*I, AMDGPU::OpName::dmask)->getImm();
+ DMask = LSO.TII->getNamedOperand(*I, AMDGPU::OpName::dmask)->getImm();
// Offset is not considered for MIMG instructions.
Offset = 0;
} else {
@@ -522,17 +523,17 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
}
if (InstClass == TBUFFER_LOAD || InstClass == TBUFFER_STORE)
- Format = TII.getNamedOperand(*I, AMDGPU::OpName::format)->getImm();
+ Format = LSO.TII->getNamedOperand(*I, AMDGPU::OpName::format)->getImm();
- Width = getOpcodeWidth(*I, TII);
+ Width = getOpcodeWidth(*I, *LSO.TII);
if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
Offset &= 0xffff;
} else if (InstClass != MIMG) {
- CPol = TII.getNamedOperand(*I, AMDGPU::OpName::cpol)->getImm();
+ CPol = LSO.TII->getNamedOperand(*I, AMDGPU::OpName::cpol)->getImm();
}
- AddressRegs Regs = getRegs(Opc, TII);
+ AddressRegs Regs = getRegs(Opc, *LSO.TII);
NumAddresses = 0;
for (unsigned J = 0; J < Regs.NumVAddrs; J++)
@@ -910,19 +911,10 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge(
}
const unsigned InstSubclass = getInstSubclass(Opc, *TII);
- // Do not merge VMEM buffer instructions with "swizzled" bit set.
- int Swizzled =
- AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::swz);
- if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm())
- return false;
-
DenseSet<Register> RegDefsToMove;
DenseSet<Register> PhysRegUsesToMove;
addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove);
- const TargetRegisterClass *DataRC = getDataRegClass(*CI.I);
- bool IsAGPR = TRI->hasAGPRs(DataRC);
-
MachineBasicBlock::iterator E = std::next(Paired.I);
MachineBasicBlock::iterator MBBI = std::next(CI.I);
MachineBasicBlock::iterator MBBE = CI.I->getParent()->end();
@@ -971,15 +963,6 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge(
continue;
}
- // Don't merge volatiles.
- if (MBBI->hasOrderedMemoryRef())
- return false;
-
- int Swizzled =
- AMDGPU::getNamedOperandIdx(MBBI->getOpcode(), AMDGPU::OpName::swz);
- if (Swizzled != -1 && MBBI->getOperand(Swizzled).getImm())
- return false;
-
// Handle a case like
// DS_WRITE_B32 addr, v, idx0
// w = DS_READ_B32 addr, idx0
@@ -991,17 +974,6 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge(
continue;
if (&*MBBI == &*Paired.I) {
- if (TRI->hasAGPRs(getDataRegClass(*MBBI)) != IsAGPR)
- return false;
- // FIXME: nothing is illegal in a ds_write2 opcode with two AGPR data
- // operands. However we are reporting that ds_write2 shall have
- // only VGPR data so that machine copy propagation does not
- // create an illegal instruction with a VGPR and AGPR sources.
- // Consequenctially if we create such instruction the verifier
- // will complain.
- if (IsAGPR && CI.InstClass == DS_WRITE)
- return false;
-
// We need to go through the list of instructions that we plan to
// move and make sure they are all safe to move down past the merged
// instruction.
@@ -1542,49 +1514,36 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
std::pair<unsigned, unsigned>
SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI,
const CombineInfo &Paired) {
-
- assert(CI.Width != 0 && Paired.Width != 0 && "Width cannot be zero");
-
bool ReverseOrder;
if (CI.InstClass == MIMG) {
assert(
(countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) &&
"No overlaps");
ReverseOrder = CI.DMask > Paired.DMask;
- } else
+ } else {
ReverseOrder = CI.Offset > Paired.Offset;
+ }
unsigned Idx0;
unsigned Idx1;
- if (CI.Width + Paired.Width > 4) {
- assert(CI.Width == 4 && Paired.Width == 4);
+ static const unsigned Idxs[5][4] = {
+ {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3},
+ {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4},
+ {AMDGPU::sub2, AMDGPU::sub2_sub3, AMDGPU::sub2_sub3_sub4, AMDGPU::sub2_sub3_sub4_sub5},
+ {AMDGPU::sub3, AMDGPU::sub3_sub4, AMDGPU::sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6},
+ {AMDGPU::sub4, AMDGPU::sub4_sub5, AMDGPU::sub4_sub5_sub6, AMDGPU::sub4_sub5_sub6_sub7},
+ };
- if (ReverseOrder) {
- Idx1 = AMDGPU::sub0_sub1_sub2_sub3;
- Idx0 = AMDGPU::sub4_sub5_sub6_sub7;
- } else {
- Idx0 = AMDGPU::sub0_sub1_sub2_sub3;
- Idx1 = AMDGPU::sub4_sub5_sub6_sub7;
- }
+ assert(CI.Width >= 1 && CI.Width <= 4);
+ assert(Paired.Width >= 1 && Paired.Width <= 4);
+
+ if (ReverseOrder) {
+ Idx1 = Idxs[0][Paired.Width - 1];
+ Idx0 = Idxs[Paired.Width][CI.Width - 1];
} else {
- static const unsigned Idxs[4][4] = {
- {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3},
- {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, 0},
- {AMDGPU::sub2, AMDGPU::sub2_sub3, 0, 0},
- {AMDGPU::sub3, 0, 0, 0},
- };
-
- assert(CI.Width >= 1 && CI.Width <= 3);
- assert(Paired.Width >= 1 && Paired.Width <= 3);
-
- if (ReverseOrder) {
- Idx1 = Idxs[0][Paired.Width - 1];
- Idx0 = Idxs[Paired.Width][CI.Width - 1];
- } else {
- Idx0 = Idxs[0][CI.Width - 1];
- Idx1 = Idxs[CI.Width][Paired.Width - 1];
- }
+ Idx0 = Idxs[0][CI.Width - 1];
+ Idx1 = Idxs[CI.Width][Paired.Width - 1];
}
return std::make_pair(Idx0, Idx1);
@@ -1847,7 +1806,8 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
if (AMDGPU::getGlobalSaddrOp(MI.getOpcode()) < 0)
return false;
- if (MI.mayLoad() && TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != NULL)
+ if (MI.mayLoad() &&
+ TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != nullptr)
return false;
if (AnchorList.count(&MI))
@@ -1988,6 +1948,7 @@ void SILoadStoreOptimizer::addInstToMergeableList(const CombineInfo &CI,
std::list<std::list<CombineInfo> > &MergeableInsts) const {
for (std::list<CombineInfo> &AddrList : MergeableInsts) {
if (AddrList.front().InstClass == CI.InstClass &&
+ AddrList.front().IsAGPR == CI.IsAGPR &&
AddrList.front().hasSameBaseAddress(*CI.I)) {
AddrList.emplace_back(CI);
return;
@@ -2030,13 +1991,29 @@ SILoadStoreOptimizer::collectMergeableInsts(
if (InstClass == UNKNOWN)
continue;
+ // Do not merge VMEM buffer instructions with "swizzled" bit set.
+ int Swizzled =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
+ if (Swizzled != -1 && MI.getOperand(Swizzled).getImm())
+ continue;
+
CombineInfo CI;
- CI.setMI(MI, *TII, *STM);
+ CI.setMI(MI, *this);
CI.Order = Order++;
if (!CI.hasMergeableAddress(*MRI))
continue;
+ if (CI.InstClass == DS_WRITE && CI.IsAGPR) {
+ // FIXME: nothing is illegal in a ds_write2 opcode with two AGPR data
+ // operands. However we are reporting that ds_write2 shall have
+ // only VGPR data so that machine copy propagation does not
+ // create an illegal instruction with a VGPR and AGPR sources.
+ // Consequenctially if we create such instruction the verifier
+ // will complain.
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << "Mergeable: " << MI);
addInstToMergeableList(CI, MergeableInsts);
@@ -2144,54 +2121,54 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
case DS_READ: {
MachineBasicBlock::iterator NewMI =
mergeRead2Pair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *TII, *STM);
+ CI.setMI(NewMI, *this);
break;
}
case DS_WRITE: {
MachineBasicBlock::iterator NewMI =
mergeWrite2Pair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *TII, *STM);
+ CI.setMI(NewMI, *this);
break;
}
case S_BUFFER_LOAD_IMM: {
MachineBasicBlock::iterator NewMI =
mergeSBufferLoadImmPair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *TII, *STM);
+ CI.setMI(NewMI, *this);
OptimizeListAgain |= (CI.Width + Paired.Width) < 8;
break;
}
case BUFFER_LOAD: {
MachineBasicBlock::iterator NewMI =
mergeBufferLoadPair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *TII, *STM);
+ CI.setMI(NewMI, *this);
OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
}
case BUFFER_STORE: {
MachineBasicBlock::iterator NewMI =
mergeBufferStorePair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *TII, *STM);
+ CI.setMI(NewMI, *this);
OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
}
case MIMG: {
MachineBasicBlock::iterator NewMI =
mergeImagePair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *TII, *STM);
+ CI.setMI(NewMI, *this);
OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
}
case TBUFFER_LOAD: {
MachineBasicBlock::iterator NewMI =
mergeTBufferLoadPair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *TII, *STM);
+ CI.setMI(NewMI, *this);
OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
}
case TBUFFER_STORE: {
MachineBasicBlock::iterator NewMI =
mergeTBufferStorePair(CI, Paired, InstsToMove);
- CI.setMI(NewMI, *TII, *STM);
+ CI.setMI(NewMI, *this);
OptimizeListAgain |= (CI.Width + Paired.Width) < 4;
break;
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 3168bcd53eda..e1018bdfde46 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -56,6 +56,7 @@
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -90,6 +91,8 @@ private:
unsigned OrSaveExecOpc;
unsigned Exec;
+ bool EnableOptimizeEndCf = false;
+
bool hasKill(const MachineBasicBlock *Begin, const MachineBasicBlock *End);
void emitIf(MachineInstr &MI);
@@ -579,10 +582,10 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) {
void SILowerControlFlow::optimizeEndCf() {
// If the only instruction immediately following this END_CF is an another
// END_CF in the only successor we can avoid emitting exec mask restore here.
- if (!RemoveRedundantEndcf)
+ if (!EnableOptimizeEndCf)
return;
- for (MachineInstr *MI : LoweredEndCf) {
+ for (MachineInstr *MI : reverse(LoweredEndCf)) {
MachineBasicBlock &MBB = *MI->getParent();
auto Next =
skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator()));
@@ -807,6 +810,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
+ EnableOptimizeEndCf =
+ RemoveRedundantEndcf && MF.getTarget().getOptLevel() > CodeGenOpt::None;
// This doesn't actually need LiveIntervals, but we can preserve them.
LIS = getAnalysisIfAvailable<LiveIntervals>();
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 55196fe334e6..0fbdbef6fcce 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -127,7 +127,7 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
// FIXME: Just emit the readlane/writelane directly
if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
for (const CalleeSavedInfo &CI : reverse(CSI)) {
- unsigned Reg = CI.getReg();
+ Register Reg = CI.getReg();
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, MVT::i32);
@@ -239,50 +239,6 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
return false;
}
-// Find lowest available VGPR and use it as VGPR reserved for SGPR spills.
-static bool lowerShiftReservedVGPR(MachineFunction &MF,
- const GCNSubtarget &ST) {
- SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- const Register PreReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill;
- // Early out if pre-reservation of a VGPR for SGPR spilling is disabled.
- if (!PreReservedVGPR)
- return false;
-
- // If there are no free lower VGPRs available, default to using the
- // pre-reserved register instead.
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- Register LowestAvailableVGPR =
- TRI->findUnusedRegister(MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF);
- if (!LowestAvailableVGPR)
- LowestAvailableVGPR = PreReservedVGPR;
-
- MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- // Create a stack object for a possible spill in the function prologue.
- // Note Non-CSR VGPR also need this as we may overwrite inactive lanes.
- Optional<int> FI = FrameInfo.CreateSpillStackObject(4, Align(4));
-
- // Find saved info about the pre-reserved register.
- const auto *ReservedVGPRInfoItr =
- llvm::find_if(FuncInfo->getSGPRSpillVGPRs(),
- [PreReservedVGPR](const auto &SpillRegInfo) {
- return SpillRegInfo.VGPR == PreReservedVGPR;
- });
-
- assert(ReservedVGPRInfoItr != FuncInfo->getSGPRSpillVGPRs().end());
- auto Index =
- std::distance(FuncInfo->getSGPRSpillVGPRs().begin(), ReservedVGPRInfoItr);
-
- FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, Index);
-
- for (MachineBasicBlock &MBB : MF) {
- assert(LowestAvailableVGPR.isValid() && "Did not find an available VGPR");
- MBB.addLiveIn(LowestAvailableVGPR);
- MBB.sortUniqueLiveIns();
- }
-
- return true;
-}
-
bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
@@ -304,11 +260,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
if (!MFI.hasStackObjects() && !HasCSRs) {
SaveBlocks.clear();
RestoreBlocks.clear();
- if (FuncInfo->VGPRReservedForSGPRSpill) {
- // Free the reserved VGPR for later possible use by frame lowering.
- FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
- MRI.freezeReservedRegs(MF);
- }
return false;
}
@@ -326,8 +277,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
// This operates under the assumption that only other SGPR spills are users
// of the frame index.
- lowerShiftReservedVGPR(MF, ST);
-
// To track the spill frame indices handled in this pass.
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
@@ -375,8 +324,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
FuncInfo->removeDeadFrameIndices(MFI);
MadeChange = true;
- } else if (FuncInfo->VGPRReservedForSGPRSpill) {
- FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
}
SaveBlocks.clear();
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 3ce368ef4db9..cca8565c9ff9 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -118,10 +118,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
WorkItemIDX = true;
- if (!F.hasFnAttribute("amdgpu-no-workitem-id-y"))
+ if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
+ ST.getMaxWorkitemID(F, 1) != 0)
WorkItemIDY = true;
- if (!F.hasFnAttribute("amdgpu-no-workitem-id-z"))
+ if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
+ ST.getMaxWorkitemID(F, 2) != 0)
WorkItemIDZ = true;
if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
@@ -274,7 +276,6 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned WaveSize = ST.getWavefrontSize();
- SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
unsigned Size = FrameInfo.getObjectSize(FI);
unsigned NumLanes = Size / 4;
@@ -291,16 +292,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
Register LaneVGPR;
unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
- // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
- // when one of the two conditions is true:
- // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
- // reserved.
- // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
- // required.
- if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
- assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
- LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
- } else if (VGPRIndex == 0) {
+ if (VGPRIndex == 0) {
LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
if (LaneVGPR == AMDGPU::NoRegister) {
// We have no VGPRs left for spilling SGPRs. Reset because we will not
@@ -308,6 +300,8 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
SGPRToVGPRSpills.erase(FI);
NumVGPRSpillLanes -= I;
+ // FIXME: We can run out of free registers with split allocation if
+ // IPRA is enabled and a called function already uses every VGPR.
#if 0
DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
"VGPRs for SGPR spilling",
@@ -340,21 +334,6 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
return true;
}
-/// Reserve a VGPR for spilling of SGPRs
-bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
-
- Register LaneVGPR = TRI->findUnusedRegister(
- MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
- if (LaneVGPR == Register())
- return false;
- SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, None));
- FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
- return true;
-}
-
/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
/// Either AGPR is spilled to VGPR to vice versa.
/// Returns true if a \p FI can be eliminated completely.
@@ -616,24 +595,6 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
return false;
}
-// Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
-bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
- MachineFunction &MF) {
- for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
- if (i->VGPR == ReservedVGPR) {
- SpillVGPRs.erase(i);
-
- for (MachineBasicBlock &MBB : MF) {
- MBB.removeLiveIn(ReservedVGPR);
- MBB.sortUniqueLiveIns();
- }
- this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
- return true;
- }
- }
- return false;
-}
-
bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
if (UsesAGPRs)
return *UsesAGPRs;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 8accbf611c5f..8e821274bb77 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -502,7 +502,6 @@ public: // FIXME
Register SGPRForBPSaveRestoreCopy;
Optional<int> BasePointerSaveIndex;
- Register VGPRReservedForSGPRSpill;
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
public:
@@ -528,7 +527,6 @@ public:
void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
SpillVGPRs[Index].VGPR = NewVGPR;
SpillVGPRs[Index].FI = newFI;
- VGPRReservedForSGPRSpill = NewVGPR;
}
bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
@@ -556,7 +554,6 @@ public:
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
- bool reserveVGPRforSGPRSpills(MachineFunction &MF);
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
void removeDeadFrameIndices(MachineFrameInfo &MFI);
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 69eab762f05c..24a8879b5684 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -188,7 +188,7 @@ void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
- BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32))
+ BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32))
.addImm(Value)
.addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
(Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
index 6bf6c45d8cf6..e13e33ed5457 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
@@ -155,6 +155,11 @@ public:
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
};
} // end anonymous namespace
@@ -366,47 +371,42 @@ void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
// Re-calculate the liveness of \p Reg in the THEN-region
void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
-
- SmallPtrSet<MachineBasicBlock *, 16> PHIIncoming;
-
- MachineBasicBlock *ThenEntry = nullptr;
- for (auto *Succ : If->successors()) {
- if (Succ != Flow) {
- ThenEntry = Succ;
- break;
+ SetVector<MachineBasicBlock *> Blocks;
+ SmallVector<MachineBasicBlock *> WorkList({If});
+
+ // Collect all successors until we see the flow block, where we should
+ // reconverge.
+ while (!WorkList.empty()) {
+ auto *MBB = WorkList.pop_back_val();
+ for (auto *Succ : MBB->successors()) {
+ if (Succ != Flow && !Blocks.contains(Succ)) {
+ WorkList.push_back(Succ);
+ Blocks.insert(Succ);
+ }
}
}
- assert(ThenEntry && "No successor in Then region?");
LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
- df_iterator_default_set<MachineBasicBlock *, 16> Visited;
-
- for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
- if (MBB == Flow)
- break;
-
+ for (MachineBasicBlock *MBB : Blocks) {
// Clear Live bit, as we will recalculate afterwards
LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
<< '\n');
OldVarInfo.AliveBlocks.reset(MBB->getNumber());
}
+ SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
+
// Get the blocks the Reg should be alive through
for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
++I) {
auto *UseMI = I->getParent();
if (UseMI->isPHI() && I->readsReg()) {
- if (Visited.contains(UseMI->getParent()))
+ if (Blocks.contains(UseMI->getParent()))
PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
}
}
- Visited.clear();
-
- for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
- if (MBB == Flow)
- break;
-
+ for (MachineBasicBlock *MBB : Blocks) {
SmallVector<MachineInstr *> Uses;
// PHI instructions has been processed before.
findNonPHIUsesInBlock(Reg, MBB, Uses);
@@ -433,7 +433,7 @@ void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
// Set the isKilled flag if we get new Kills in the THEN region.
for (auto *MI : OldVarInfo.Kills) {
- if (Visited.contains(MI->getParent()))
+ if (Blocks.contains(MI->getParent()))
MI->addRegisterKilled(Reg, TRI);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 340e2b48e5cd..eb9452f4b85e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -617,7 +617,7 @@ def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16
let HasSGPR = 1;
}
-def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
+def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16], 32,
(add PRIVATE_RSRC_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
@@ -784,7 +784,7 @@ multiclass SRegClass<int numRegs, int priority,
}
defm "" : SRegClass<3, 14, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>;
-defm "" : SRegClass<4, 15, [v4i32, v4f32, v2i64], SGPR_128Regs, TTMP_128Regs>;
+defm "" : SRegClass<4, 15, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>;
defm "" : SRegClass<5, 16, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
defm "" : SRegClass<6, 17, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
defm "" : SRegClass<7, 18, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
@@ -824,7 +824,7 @@ multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
(add VGPR_64)>;
defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>;
-defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64], (add VGPR_128)>;
+defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add VGPR_128)>;
defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;
defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>;
@@ -846,7 +846,7 @@ multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
defm AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16],
(add AGPR_64)>;
defm AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>;
-defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64], (add AGPR_128)>;
+defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add AGPR_128)>;
defm AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;
defm AReg_192 : ARegClass<6, [v6i32, v6f32, v3i64, v3f64], (add AGPR_192)>;
defm AReg_224 : ARegClass<7, [v7i32, v7f32], (add AGPR_224)>;
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 77ee3c0ff0e4..46efb3c605c6 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -861,12 +861,16 @@ MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,
MachineInstr *VcmpMI;
const MachineOperand &Op0 = MI.getOperand(0);
const MachineOperand &Op1 = MI.getOperand(1);
+
+ // VCC represents lanes killed.
+ Register VCC = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
+
if (TRI->isVGPR(*MRI, Op0.getReg())) {
Opcode = AMDGPU::getVOPe32(Opcode);
VcmpMI = BuildMI(MBB, &MI, DL, TII->get(Opcode)).add(Op1).add(Op0);
} else {
VcmpMI = BuildMI(MBB, &MI, DL, TII->get(Opcode))
- .addReg(AMDGPU::VCC, RegState::Define)
+ .addReg(VCC, RegState::Define)
.addImm(0) // src0 modifiers
.add(Op1)
.addImm(0) // src1 modifiers
@@ -874,9 +878,6 @@ MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,
.addImm(0); // omod
}
- // VCC represents lanes killed.
- Register VCC = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
-
MachineInstr *MaskUpdateMI =
BuildMI(MBB, MI, DL, TII->get(AndN2Opc), LiveMaskReg)
.addReg(LiveMaskReg)
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 1713586dcf5b..3f7837f7dbf1 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -246,10 +246,10 @@ let Defs = [SCC] in {
def S_BCNT0_I32_B32 : SOP1_32 <"s_bcnt0_i32_b32">;
def S_BCNT0_I32_B64 : SOP1_32_64 <"s_bcnt0_i32_b64">;
def S_BCNT1_I32_B32 : SOP1_32 <"s_bcnt1_i32_b32",
- [(set i32:$sdst, (ctpop i32:$src0))]
+ [(set i32:$sdst, (UniformUnaryFrag<ctpop> i32:$src0))]
>;
def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64",
- [(set i32:$sdst, (ctpop i64:$src0))]
+ [(set i32:$sdst, (UniformUnaryFrag<ctpop> i64:$src0))]
>;
} // End Defs = [SCC]
@@ -518,10 +518,9 @@ let Uses = [SCC] in {
def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32",
[(set i32:$sdst, (SelectPat<select> i32:$src0, i32:$src1))]
>;
- def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64",
- [(set i64:$sdst, (SelectPat<select> i64:$src0, i64:$src1))]
- >;
}
+
+ def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64">;
} // End Uses = [SCC]
let Defs = [SCC] in {
@@ -551,11 +550,11 @@ def S_XOR_B64 : SOP2_64 <"s_xor_b64",
>;
def S_XNOR_B32 : SOP2_32 <"s_xnor_b32",
- [(set i32:$sdst, (not (xor_oneuse i32:$src0, i32:$src1)))]
+ [(set i32:$sdst, (UniformUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1)))]
>;
def S_XNOR_B64 : SOP2_64 <"s_xnor_b64",
- [(set i64:$sdst, (not (xor_oneuse i64:$src0, i64:$src1)))]
+ [(set i64:$sdst, (UniformUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1)))]
>;
def S_NAND_B32 : SOP2_32 <"s_nand_b32",
@@ -1371,7 +1370,7 @@ def : GCNPat <
>;
def : GCNPat <
- (i64 (ctpop i64:$src)),
+ (i64 (UniformUnaryFrag<ctpop> i64:$src)),
(i64 (REG_SEQUENCE SReg_64,
(i32 (COPY_TO_REGCLASS (S_BCNT1_I32_B64 $src), SReg_32)), sub0,
(S_MOV_B32 (i32 0)), sub1))
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 0bee9022975e..18c348d1cf89 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -79,8 +79,8 @@ const char* const IdSymbolic[] = {
"HW_REG_FLAT_SCR_LO",
"HW_REG_FLAT_SCR_HI",
"HW_REG_XNACK_MASK",
- nullptr, // HW_ID1, no predictable values
- nullptr, // HW_ID2, no predictable values
+ "HW_REG_HW_ID1",
+ "HW_REG_HW_ID2",
"HW_REG_POPS_PACKER",
nullptr,
nullptr,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d20eaaaa65e8..1e96266eb06c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -132,6 +132,8 @@ bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
#define GET_MIMGInfoTable_IMPL
#define GET_MIMGLZMappingTable_IMPL
#define GET_MIMGMIPMappingTable_IMPL
+#define GET_MIMGBiasMappingTable_IMPL
+#define GET_MIMGOffsetMappingTable_IMPL
#define GET_MIMGG16MappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
@@ -410,7 +412,7 @@ void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
}
std::string AMDGPUTargetID::toString() const {
- std::string StringRep = "";
+ std::string StringRep;
raw_string_ostream StreamRep(StringRep);
auto TargetTriple = STI.getTargetTriple();
@@ -421,7 +423,7 @@ std::string AMDGPUTargetID::toString() const {
<< TargetTriple.getOSName() << '-'
<< TargetTriple.getEnvironmentName() << '-';
- std::string Processor = "";
+ std::string Processor;
// TODO: Following else statement is present here because we used various
// alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
// Remove once all aliases are removed from GCNProcessors.td.
@@ -432,7 +434,7 @@ std::string AMDGPUTargetID::toString() const {
Twine(Version.Stepping))
.str();
- std::string Features = "";
+ std::string Features;
if (Optional<uint8_t> HsaAbiVersion = getHsaAbiVersion(&STI)) {
switch (*HsaAbiVersion) {
case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
@@ -1018,9 +1020,18 @@ static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
}
bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
- return
- ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
- IdSymbolic[Id] && (Id != ID_XNACK_MASK || !AMDGPU::isGFX10_BEncoding(STI));
+ switch (Id) {
+ case ID_HW_ID:
+ return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
+ case ID_HW_ID1:
+ case ID_HW_ID2:
+ return isGFX10Plus(STI);
+ case ID_XNACK_MASK:
+ return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
+ default:
+ return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
+ IdSymbolic[Id];
+ }
}
bool isValidHwreg(int64_t Id) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 061c74c0ace6..89f928eb8b92 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -64,6 +64,7 @@ struct GcnBufferFormatInfo {
#define GET_MIMGEncoding_DECL
#define GET_MIMGLZMapping_DECL
#define GET_MIMGMIPMapping_DECL
+#define GET_MIMGBiASMapping_DECL
#include "AMDGPUGenSearchableTables.inc"
namespace IsaInfo {
@@ -330,6 +331,16 @@ struct MIMGMIPMappingInfo {
MIMGBaseOpcode NONMIP;
};
+struct MIMGBiasMappingInfo {
+ MIMGBaseOpcode Bias;
+ MIMGBaseOpcode NoBias;
+};
+
+struct MIMGOffsetMappingInfo {
+ MIMGBaseOpcode Offset;
+ MIMGBaseOpcode NoOffset;
+};
+
struct MIMGG16MappingInfo {
MIMGBaseOpcode G;
MIMGBaseOpcode G16;
@@ -342,6 +353,12 @@ LLVM_READONLY
const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
LLVM_READONLY
+const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
+
+LLVM_READONLY
+const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
+
+LLVM_READONLY
const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
LLVM_READONLY
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 8d232ffe4114..b9ff814a4dc5 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -637,9 +637,9 @@ class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> :
)
>;
-def : divergent_i64_BinOp <and, V_AND_B32_e32>;
-def : divergent_i64_BinOp <or, V_OR_B32_e32>;
-def : divergent_i64_BinOp <xor, V_XOR_B32_e32>;
+def : divergent_i64_BinOp <and, V_AND_B32_e64>;
+def : divergent_i64_BinOp <or, V_OR_B32_e64>;
+def : divergent_i64_BinOp <xor, V_XOR_B32_e64>;
let SubtargetPredicate = Has16BitInsts in {
@@ -688,6 +688,36 @@ let SubtargetPredicate = HasDLInsts in {
let isReMaterializable = 1 in
defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>;
+def : GCNPat<
+ (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))),
+ (i32 (V_XNOR_B32_e64 $src0, $src1))
+>;
+
+def : GCNPat<
+ (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)),
+ (i32 (V_XNOR_B32_e64 $src0, $src1))
+>;
+
+def : GCNPat<
+ (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))),
+ (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64
+ (i32 (EXTRACT_SUBREG $src0, sub0)),
+ (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0,
+ (i32 (V_XNOR_B32_e64
+ (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1)
+>;
+
+def : GCNPat<
+ (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)),
+ (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64
+ (i32 (EXTRACT_SUBREG $src0, sub0)),
+ (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0,
+ (i32 (V_XNOR_B32_e64
+ (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1)
+>;
+
let Constraints = "$vdst = $src2",
DisableEncoding = "$src2",
isConvertibleToThreeAddress = 1,
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 32222b3eb93c..707475ceccee 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -388,6 +388,12 @@ class VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC
let HasModifiers = 0;
let Asm64 = "$vdst, $src0, $src1, $src2$cbsz$abid$blgp";
let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp);
+ // Dst and SrcC cannot partially overlap if SrcC/Dst is bigger than 4 VGPRs.
+ // We then create two versions of the instruction: with tied dst and src2
+ // and with the eralyclobber flag on the dst. This is strciter than the
+ // actual HW restriction. In particular earlyclobber also affects src0 and
+ // src1 allocation which is not required.
+ bit NoDstOverlap = !gt(DstVT.Size, 128);
}
def VOPProfileMAI_F32_F32_X4 : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, AISrc_128_f32, ADst_128>;
@@ -426,6 +432,11 @@ def VOPProfileMAI_F32_V4I16_X32_VCD : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F
def VOPProfileMAI_F64_16X16X4F64_VCD : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64, VISrc_256_f64, VDst_256, AVSrc_64>;
def VOPProfileMAI_F64_4X4X4F64_VCD : VOPProfileMAI<VOP_F64_F64_F64_F64, VISrc_64_f64, VDst_64, AVSrc_64>;
+class MFMATable <bit is_mac, string Name> {
+ bit IsMac = is_mac;
+ string FMAOp = Name;
+}
+
let Predicates = [HasMAIInsts] in {
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
@@ -435,13 +446,31 @@ let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
} // End isMoveImm = 1
} // End isAsCheapAsAMove = 1, isReMaterializable = 1
-multiclass MAIInst<string OpName, string P, SDPatternOperator node> {
+multiclass MAIInst<string OpName, string P, SDPatternOperator node,
+ bit NoDstOverlap = !cast<VOPProfileMAI>("VOPProfileMAI_" # P).NoDstOverlap> {
let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
// FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported.
- defm "" : VOP3Inst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P), node>;
-
- let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in
- defm _vgprcd : VOP3Inst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD")>;
+ let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in {
+ defm "" : VOP3Inst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P), !if(NoDstOverlap, null_frag, node)>,
+ MFMATable<0, NAME # "_e64">;
+
+ let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in
+ defm _vgprcd : VOP3Inst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD")>,
+ MFMATable<0, NAME # "_vgprcd_e64">;
+ }
+
+ foreach _ = BoolToList<NoDstOverlap>.ret in {
+ let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""),
+ isConvertibleToThreeAddress = NoDstOverlap,
+ Mnemonic = OpName in {
+ defm "_mac" : VOP3Inst<OpName # "_mac", !cast<VOPProfileMAI>("VOPProfileMAI_" # P), node>,
+ MFMATable<1, NAME # "_e64">;
+
+ let SubtargetPredicate = isGFX90APlus in
+ defm _mac_vgprcd : VOP3Inst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD")>,
+ MFMATable<1, NAME # "_vgprcd_e64">;
+ }
+ }
} // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1
}
@@ -517,6 +546,7 @@ multiclass VOP3P_Real_MAI<bits<7> op> {
}
}
+let Constraints = "" in {
multiclass VOP3P_Real_MFMA_gfx90a<bits<7> op> {
let SubtargetPredicate = isGFX90AOnly,
AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A" in {
@@ -536,6 +566,7 @@ multiclass VOP3P_Real_MFMA<bits<7> op> :
let DecoderNamespace = "GFX8";
}
}
+}
defm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>;
defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x01>;
diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h
index 1d5e45aec06c..979371bf7cf6 100644
--- a/llvm/lib/Target/ARM/ARM.h
+++ b/llvm/lib/Target/ARM/ARM.h
@@ -25,12 +25,9 @@ class ARMAsmPrinter;
class ARMBaseTargetMachine;
class ARMRegisterBankInfo;
class ARMSubtarget;
-struct BasicBlockInfo;
class Function;
class FunctionPass;
class InstructionSelector;
-class MachineBasicBlock;
-class MachineFunction;
class MachineInstr;
class MCInst;
class PassRegistry;
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 8173fe4036a8..4efbdbb2abc8 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -512,8 +512,7 @@ def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
- [HasV6T2Ops, FeaturePerfMon,
- FeatureV7Clrex]>;
+ [HasV6T2Ops, FeatureV7Clrex]>;
def HasV8MMainlineOps :
SubtargetFeature<"v8m.main", "HasV8MMainlineOps", "true",
@@ -522,7 +521,7 @@ def HasV8MMainlineOps :
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
- [HasV7Ops, FeatureAcquireRelease]>;
+ [HasV7Ops, FeaturePerfMon, FeatureAcquireRelease]>;
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions",
@@ -553,6 +552,10 @@ def HasV8_7aOps : SubtargetFeature<"v8.7a", "HasV8_7aOps", "true",
"Support ARM v8.7a instructions",
[HasV8_6aOps]>;
+def HasV8_8aOps : SubtargetFeature<"v8.8a", "HasV8_8aOps", "true",
+ "Support ARM v8.8a instructions",
+ [HasV8_7aOps]>;
+
def HasV9_0aOps : SubtargetFeature<"v9a", "HasV9_0aOps", "true",
"Support ARM v9a instructions",
[HasV8_5aOps]>;
@@ -565,6 +568,10 @@ def HasV9_2aOps : SubtargetFeature<"v9.2a", "HasV9_2aOps", "true",
"Support ARM v9.2a instructions",
[HasV8_7aOps, HasV9_1aOps]>;
+def HasV9_3aOps : SubtargetFeature<"v9.3a", "HasV9_3aOps", "true",
+ "Support ARM v9.3a instructions",
+ [HasV8_8aOps, HasV9_2aOps]>;
+
def HasV8_1MMainlineOps : SubtargetFeature<
"v8.1m.main", "HasV8_1MMainlineOps", "true",
"Support ARM v8-1M Mainline instructions",
@@ -757,7 +764,8 @@ def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops,
FeatureNEON,
FeatureDB,
FeatureDSP,
- FeatureAClass]>;
+ FeatureAClass,
+ FeaturePerfMon]>;
def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops,
FeatureNEON,
@@ -766,13 +774,15 @@ def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops,
FeatureTrustZone,
FeatureMP,
FeatureVirtualization,
- FeatureAClass]>;
+ FeatureAClass,
+ FeaturePerfMon]>;
def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
FeatureDB,
FeatureDSP,
FeatureHWDivThumb,
- FeatureRClass]>;
+ FeatureRClass,
+ FeaturePerfMon]>;
def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
FeatureThumb2,
@@ -894,6 +904,19 @@ def ARMv87a : Architecture<"armv8.7-a", "ARMv87a", [HasV8_7aOps,
FeatureCRC,
FeatureRAS,
FeatureDotProd]>;
+def ARMv88a : Architecture<"armv8.8-a", "ARMv88a", [HasV8_8aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
def ARMv9a : Architecture<"armv9-a", "ARMv9a", [HasV9_0aOps,
FeatureAClass,
@@ -931,6 +954,19 @@ def ARMv92a : Architecture<"armv9.2-a", "ARMv92a", [HasV9_2aOps,
FeatureCRC,
FeatureRAS,
FeatureDotProd]>;
+def ARMv93a : Architecture<"armv9.3-a", "ARMv93a", [HasV9_3aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
@@ -1425,8 +1461,7 @@ def : ProcNoItin<"neoverse-n1", [ARMv82a,
def : ProcNoItin<"neoverse-n2", [ARMv85a,
FeatureBF16,
- FeatureMatMulInt8,
- FeaturePerfMon]>;
+ FeatureMatMulInt8]>;
def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureHasRetAddrStack,
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 884f38ff6c58..cde715880376 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -4868,6 +4868,36 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
return false;
}
}
+
+ // Check the address model by taking the first Imm operand and checking it is
+ // legal for that addressing mode.
+ ARMII::AddrMode AddrMode =
+ (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);
+ switch (AddrMode) {
+ default:
+ break;
+ case ARMII::AddrModeT2_i7:
+ case ARMII::AddrModeT2_i7s2:
+ case ARMII::AddrModeT2_i7s4:
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i8pos:
+ case ARMII::AddrModeT2_i8neg:
+ case ARMII::AddrModeT2_i8s4:
+ case ARMII::AddrModeT2_i12: {
+ uint32_t Imm = 0;
+ for (auto Op : MI.operands()) {
+ if (Op.isImm()) {
+ Imm = Op.getImm();
+ break;
+ }
+ }
+ if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {
+ ErrInfo = "Incorrect AddrMode Imm for instruction";
+ return false;
+ }
+ break;
+ }
+ }
return true;
}
diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp
index 81ec4d09a408..b15ef094d9d2 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -534,7 +534,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
MIRBuilder.buildInstr(ARM::ADJCALLSTACKUP)
.addImm(ArgAssigner.StackOffset)
- .addImm(0)
+ .addImm(-1ULL)
.add(predOps(ARMCC::AL));
return true;
diff --git a/llvm/lib/Target/ARM/ARMCallLowering.h b/llvm/lib/Target/ARM/ARMCallLowering.h
index 87b18f811747..38095617fb4f 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.h
+++ b/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -23,7 +23,6 @@
namespace llvm {
class ARMTargetLowering;
-class MachineFunction;
class MachineInstrBuilder;
class MachineIRBuilder;
class Value;
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index fa244786a80d..2f083561bbd4 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1144,7 +1144,7 @@ static bool determineFPRegsToClear(const MachineInstr &MI,
if (!Op.isReg())
continue;
- unsigned Reg = Op.getReg();
+ Register Reg = Op.getReg();
if (Op.isDef()) {
if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
(Reg >= ARM::D0 && Reg <= ARM::D15) ||
@@ -1356,7 +1356,7 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
std::vector<unsigned> NonclearedFPRegs;
for (const MachineOperand &Op : MBBI->operands()) {
if (Op.isReg() && Op.isUse()) {
- unsigned Reg = Op.getReg();
+ Register Reg = Op.getReg();
assert(!ARM::DPRRegClass.contains(Reg) ||
ARM::DPR_VFP2RegClass.contains(Reg));
assert(!ARM::QPRRegClass.contains(Reg));
@@ -1451,9 +1451,9 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
// restore FPSCR from stack and clear bits 0-4, 7, 28-31
// The other bits are program global according to the AAPCS
if (passesFPReg) {
- BuildMI(MBB, MBBI, DL, TII->get(ARM::t2LDRi8), SpareReg)
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tLDRspi), SpareReg)
.addReg(ARM::SP)
- .addImm(0x40)
+ .addImm(0x10)
.add(predOps(ARMCC::AL));
BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
.addReg(SpareReg)
@@ -1543,7 +1543,7 @@ void ARMExpandPseudo::CMSERestoreFPRegsV8(
std::vector<unsigned> NonclearedFPRegs;
for (const MachineOperand &Op : MBBI->operands()) {
if (Op.isReg() && Op.isDef()) {
- unsigned Reg = Op.getReg();
+ Register Reg = Op.getReg();
assert(!ARM::DPRRegClass.contains(Reg) ||
ARM::DPR_VFP2RegClass.contains(Reg));
assert(!ARM::QPRRegClass.contains(Reg));
@@ -1663,7 +1663,7 @@ static bool definesOrUsesFPReg(const MachineInstr &MI) {
for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg())
continue;
- unsigned Reg = Op.getReg();
+ Register Reg = Op.getReg();
if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
(Reg >= ARM::D0 && Reg <= ARM::D15) ||
(Reg >= ARM::S0 && Reg <= ARM::S31))
@@ -2201,7 +2201,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
case ARM::tBLXNS_CALL: {
DebugLoc DL = MBBI->getDebugLoc();
- unsigned JumpReg = MBBI->getOperand(0).getReg();
+ Register JumpReg = MBBI->getOperand(0).getReg();
// Figure out which registers are live at the point immediately before the
// call. When we indiscriminately push a set of registers, the live
diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index 28a076edd6dc..5d94b99d4c5d 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -319,7 +319,7 @@ unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, unsigned Op1) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
// Make sure the input operands are sufficiently constrained to be legal
@@ -346,7 +346,7 @@ unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, uint64_t Imm) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
// Make sure the input operand is sufficiently constrained to be legal
@@ -371,7 +371,7 @@ unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1) {
@@ -392,7 +392,7 @@ unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
if (VT == MVT::f64) return 0;
- unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ Register MoveReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VMOVSR), MoveReg)
.addReg(SrcReg));
@@ -402,7 +402,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
if (VT == MVT::i64) return 0;
- unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ Register MoveReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VMOVRS), MoveReg)
.addReg(SrcReg));
@@ -428,7 +428,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
Imm = ARM_AM::getFP32Imm(Val);
Opc = ARM::FCONSTS;
}
- unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ Register DestReg = createResultReg(TLI.getRegClassFor(VT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), DestReg).addImm(Imm));
return DestReg;
@@ -440,7 +440,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
// MachineConstantPool wants an explicit alignment.
Align Alignment = DL.getPrefTypeAlign(CFP->getType());
unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
- unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ Register DestReg = createResultReg(TLI.getRegClassFor(VT));
unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
// The extra reg is for addrmode5.
@@ -462,7 +462,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
&ARM::GPRRegClass;
- unsigned ImmReg = createResultReg(RC);
+ Register ImmReg = createResultReg(RC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ImmReg)
.addImm(CI->getZExtValue()));
@@ -478,7 +478,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
&ARM::GPRRegClass;
- unsigned ImmReg = createResultReg(RC);
+ Register ImmReg = createResultReg(RC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ImmReg)
.addImm(Imm));
@@ -531,7 +531,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
: &ARM::GPRRegClass;
- unsigned DestReg = createResultReg(RC);
+ Register DestReg = createResultReg(RC);
// FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
@@ -589,7 +589,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
if (IsPositionIndependent) {
unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
- unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DbgLoc, TII.get(Opc), NewDestReg)
@@ -605,7 +605,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
(Subtarget->isTargetMachO() && IsIndirect) ||
Subtarget->genLongCalls()) {
MachineInstrBuilder MIB;
- unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));
if (isThumb2)
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::t2LDRi12), NewDestReg)
@@ -657,7 +657,7 @@ unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
if (SI != FuncInfo.StaticAllocaMap.end()) {
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -832,7 +832,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
: &ARM::GPRRegClass;
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg)
@@ -991,7 +991,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
// If we had an unaligned load of a float we've converted it to an regular
// load. Now we must move from the GRP to the FP register.
if (needVMOV) {
- unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VMOVSR), MoveReg)
.addReg(ResultReg));
@@ -1044,7 +1044,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
// This is mostly going to be Neon/vector support.
default: return false;
case MVT::i1: {
- unsigned Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass
+ Register Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass
: &ARM::GPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
@@ -1095,7 +1095,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
if (!Subtarget->hasVFP2Base()) return false;
// Unaligned stores need special handling. Floats require word-alignment.
if (Alignment && Alignment < 4) {
- unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VMOVRS), MoveReg)
.addReg(SrcReg));
@@ -1257,7 +1257,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
(isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
- unsigned OpReg = getRegForValue(TI->getOperand(0));
+ Register OpReg = getRegForValue(TI->getOperand(0));
OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TstOpc))
@@ -1284,7 +1284,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
return true;
}
- unsigned CmpReg = getRegForValue(BI->getCondition());
+ Register CmpReg = getRegForValue(BI->getCondition());
if (CmpReg == 0) return false;
// We've been divorced from our compare! Our block was split, and
@@ -1315,7 +1315,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
}
bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
- unsigned AddrReg = getRegForValue(I->getOperand(0));
+ Register AddrReg = getRegForValue(I->getOperand(0));
if (AddrReg == 0) return false;
unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
@@ -1406,7 +1406,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
break;
}
- unsigned SrcReg1 = getRegForValue(Src1Value);
+ Register SrcReg1 = getRegForValue(Src1Value);
if (SrcReg1 == 0) return false;
unsigned SrcReg2 = 0;
@@ -1468,7 +1468,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
: &ARM::GPRRegClass;
- unsigned DestReg = createResultReg(RC);
+ Register DestReg = createResultReg(RC);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
unsigned ZeroReg = fastMaterializeConstant(Zero);
// ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
@@ -1488,10 +1488,10 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
if (!I->getType()->isDoubleTy() ||
!V->getType()->isFloatTy()) return false;
- unsigned Op = getRegForValue(V);
+ Register Op = getRegForValue(V);
if (Op == 0) return false;
- unsigned Result = createResultReg(&ARM::DPRRegClass);
+ Register Result = createResultReg(&ARM::DPRRegClass);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VCVTDS), Result)
.addReg(Op));
@@ -1507,10 +1507,10 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
if (!(I->getType()->isFloatTy() &&
V->getType()->isDoubleTy())) return false;
- unsigned Op = getRegForValue(V);
+ Register Op = getRegForValue(V);
if (Op == 0) return false;
- unsigned Result = createResultReg(&ARM::SPRRegClass);
+ Register Result = createResultReg(&ARM::SPRRegClass);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::VCVTSD), Result)
.addReg(Op));
@@ -1535,7 +1535,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
- unsigned SrcReg = getRegForValue(Src);
+ Register SrcReg = getRegForValue(Src);
if (SrcReg == 0) return false;
// Handle sign-extension.
@@ -1556,7 +1556,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
else return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg).addReg(FP));
updateValueMap(I, ResultReg);
@@ -1572,7 +1572,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
if (!isTypeLegal(RetTy, DstVT))
return false;
- unsigned Op = getRegForValue(I->getOperand(0));
+ Register Op = getRegForValue(I->getOperand(0));
if (Op == 0) return false;
unsigned Opc;
@@ -1583,7 +1583,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
else return false;
// f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg).addReg(Op));
@@ -1604,9 +1604,9 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
// Things need to be register sized for register moves.
if (VT != MVT::i32) return false;
- unsigned CondReg = getRegForValue(I->getOperand(0));
+ Register CondReg = getRegForValue(I->getOperand(0));
if (CondReg == 0) return false;
- unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ Register Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
// Check to see if we can use an immediate in the conditional move.
@@ -1649,7 +1649,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
else
MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
}
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
if (!UseImm) {
Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
@@ -1752,15 +1752,15 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
break;
}
- unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+ Register SrcReg1 = getRegForValue(I->getOperand(0));
if (SrcReg1 == 0) return false;
// TODO: Often the 2nd operand is an immediate, which can be encoded directly
// in the instruction, rather then materializing the value in a register.
- unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+ Register SrcReg2 = getRegForValue(I->getOperand(1));
if (SrcReg2 == 0) return false;
- unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
+ Register ResultReg = createResultReg(&ARM::GPRnopcRegClass);
SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1803,13 +1803,13 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
Opc = is64bit ? ARM::VMULD : ARM::VMULS;
break;
}
- unsigned Op1 = getRegForValue(I->getOperand(0));
+ Register Op1 = getRegForValue(I->getOperand(0));
if (Op1 == 0) return false;
- unsigned Op2 = getRegForValue(I->getOperand(1));
+ Register Op2 = getRegForValue(I->getOperand(1));
if (Op2 == 0) return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg)
.addReg(Op1).addReg(Op2));
@@ -2022,7 +2022,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AdjStackUp))
- .addImm(NumBytes).addImm(0));
+ .addImm(NumBytes).addImm(-1ULL));
// Now the return value.
if (RetVT != MVT::isVoid) {
@@ -2101,7 +2101,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
F.isVarArg()));
const Value *RV = Ret->getOperand(0);
- unsigned Reg = getRegForValue(RV);
+ Register Reg = getRegForValue(RV);
if (Reg == 0)
return false;
@@ -2226,7 +2226,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
ArgVTs.reserve(I->getNumOperands());
ArgFlags.reserve(I->getNumOperands());
for (Value *Op : I->operands()) {
- unsigned Arg = getRegForValue(Op);
+ Register Arg = getRegForValue(Op);
if (Arg == 0) return false;
Type *ArgTy = Op->getType();
@@ -2588,7 +2588,7 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {
if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
return false;
- unsigned SrcReg = getRegForValue(Op);
+ Register SrcReg = getRegForValue(Op);
if (!SrcReg) return false;
// Because the high bits are undefined, a truncate doesn't generate
@@ -2744,7 +2744,7 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
Type *SrcTy = Src->getType();
bool isZExt = isa<ZExtInst>(I);
- unsigned SrcReg = getRegForValue(Src);
+ Register SrcReg = getRegForValue(Src);
if (!SrcReg) return false;
EVT SrcEVT, DestEVT;
@@ -2788,7 +2788,7 @@ bool ARMFastISel::SelectShift(const Instruction *I,
}
Value *Src1Value = I->getOperand(0);
- unsigned Reg1 = getRegForValue(Src1Value);
+ Register Reg1 = getRegForValue(Src1Value);
if (Reg1 == 0) return false;
unsigned Reg2 = 0;
@@ -2797,7 +2797,7 @@ bool ARMFastISel::SelectShift(const Instruction *I,
if (Reg2 == 0) return false;
}
- unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
+ Register ResultReg = createResultReg(&ARM::GPRnopcRegClass);
if(ResultReg == 0) return false;
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -2975,7 +2975,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MVT VT) {
MIB.add(predOps(ARMCC::AL));
// Fix the address by adding pc.
- unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ Register DestReg = createResultReg(TLI.getRegClassFor(VT));
Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
: ARM::PICADD;
DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);
@@ -2987,7 +2987,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MVT VT) {
MIB.add(predOps(ARMCC::AL));
if (UseGOT_PREL && Subtarget->isThumb()) {
- unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(ARM::t2LDRi12), NewDestReg)
.addReg(DestReg)
@@ -3057,11 +3057,11 @@ bool ARMFastISel::fastLowerArguments() {
for (const Argument &Arg : F->args()) {
unsigned ArgNo = Arg.getArgNo();
unsigned SrcReg = GPRArgRegs[ArgNo];
- unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
// Without this, EmitLiveInCopies may eliminate the livein if its only
// use is a bitcast (which isn't turned into an instruction).
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
ResultReg).addReg(DstReg, getKillRegState(true));
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 4b59f9cb94ce..1f2f6f7497e0 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -516,7 +516,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Determine spill area sizes.
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
@@ -751,7 +751,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
int CFIIndex;
for (const auto &Entry : CSI) {
- unsigned Reg = Entry.getReg();
+ Register Reg = Entry.getReg();
int FI = Entry.getFrameIdx();
switch (Reg) {
case ARM::R8:
@@ -784,7 +784,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
if (GPRCS2Size > 0) {
MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
for (const auto &Entry : CSI) {
- unsigned Reg = Entry.getReg();
+ Register Reg = Entry.getReg();
int FI = Entry.getFrameIdx();
switch (Reg) {
case ARM::R8:
@@ -794,7 +794,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R12:
if (STI.splitFramePushPop(MF)) {
unsigned DwarfReg = MRI->getDwarfRegNum(
- Reg == ARM::R12 ? (unsigned)ARM::RA_AUTH_CODE : Reg, true);
+ Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
unsigned Offset = MFI.getObjectOffset(FI);
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
@@ -812,7 +812,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// instructions in the prologue.
MachineBasicBlock::iterator Pos = std::next(LastPush);
for (const auto &Entry : CSI) {
- unsigned Reg = Entry.getReg();
+ Register Reg = Entry.getReg();
int FI = Entry.getFrameIdx();
if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
(Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
@@ -1144,7 +1144,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
while (i != 0) {
unsigned LastReg = 0;
for (; i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
+ Register Reg = CSI[i-1].getReg();
if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
// D-registers in the aligned area DPRCS2 are NOT spilled here.
@@ -1237,7 +1237,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool DeleteRet = false;
for (; i != 0; --i) {
CalleeSavedInfo &Info = CSI[i-1];
- unsigned Reg = Info.getReg();
+ Register Reg = Info.getReg();
if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
// The aligned reloads from area DPRCS2 are not inserted here.
@@ -1812,7 +1812,7 @@ bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
// shrinkwrapping can cause clobbering of r12 when the PAC code is
// generated. A follow-up patch will fix this in a more performant manner.
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
- false /*SpillsLR */))
+ true /* SpillsLR */))
return false;
return true;
@@ -2353,7 +2353,7 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
CSI.insert(find_if(CSI,
[=](const auto &CS) {
- unsigned Reg = CS.getReg();
+ Register Reg = CS.getReg();
return Reg == ARM::R10 || Reg == ARM::R11 ||
Reg == ARM::R8 || Reg == ARM::R9 ||
ARM::DPRRegClass.contains(Reg);
diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
index f083fa6662e9..0d201a67af46 100644
--- a/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -164,7 +164,7 @@ static bool getBaseOffset(const MachineInstr &MI, const MachineOperand *&BaseOp,
ARMBankConflictHazardRecognizer::ARMBankConflictHazardRecognizer(
const ScheduleDAG *DAG, int64_t CPUBankMask, bool CPUAssumeITCMConflict)
- : ScheduleHazardRecognizer(), MF(DAG->MF), DL(DAG->MF.getDataLayout()),
+ : MF(DAG->MF), DL(DAG->MF.getDataLayout()),
DataMask(DataBankMask.getNumOccurrences() ? int64_t(DataBankMask)
: CPUBankMask),
AssumeITCMBankConflict(AssumeITCMConflict.getNumOccurrences()
diff --git a/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/llvm/lib/Target/ARM/ARMHazardRecognizer.h
index c1f1bcd0a629..66a1477e5e08 100644
--- a/llvm/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -34,7 +34,7 @@ class ARMHazardRecognizerFPMLx : public ScheduleHazardRecognizer {
unsigned FpMLxStalls = 0;
public:
- ARMHazardRecognizerFPMLx() : ScheduleHazardRecognizer() { MaxLookAhead = 1; }
+ ARMHazardRecognizerFPMLx() { MaxLookAhead = 1; }
HazardType getHazardType(SUnit *SU, int Stalls) override;
void Reset() override;
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index bb2859c766c2..98c8133282a2 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3227,7 +3227,7 @@ bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
if (!ImmAPF.getExactInverse(&ToConvert))
return false;
}
- APSInt Converted(64, 0);
+ APSInt Converted(64, false);
bool IsExact;
ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven,
&IsExact);
@@ -5737,8 +5737,7 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
// them into a GPRPair.
SDLoc dl(N);
- SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
- : SDValue(nullptr,0);
+ SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
SmallVector<bool, 8> OpChanged;
// Glue node will be appended late.
@@ -5801,8 +5800,8 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
SDValue V0 = N->getOperand(i+1);
SDValue V1 = N->getOperand(i+2);
- unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
- unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+ Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
+ Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
SDValue PairedReg;
MachineRegisterInfo &MRI = MF->getRegInfo();
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 3d45db349644..fe4e6b24367a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2899,7 +2899,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
unsigned Bytes = Arg.getValueSizeInBits() / 8;
int FI = std::numeric_limits<int>::max();
if (Arg.getOpcode() == ISD::CopyFromReg) {
- unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!Register::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
@@ -4018,7 +4018,7 @@ SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
assert(Mask && "Missing call preserved mask for calling convention");
// Mark LR an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+ Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
SDValue ReturnAddress =
DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
@@ -4272,7 +4272,7 @@ SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
RC = &ARM::GPRRegClass;
// Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
SDValue ArgValue2;
@@ -4342,7 +4342,7 @@ int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
- unsigned VReg = MF.addLiveIn(Reg, RC);
+ Register VReg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(OrigArg, 4 * i));
@@ -4527,7 +4527,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// If this value is passed in r0 and has the returned attribute (e.g.
@@ -6065,7 +6065,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+ Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
@@ -14682,7 +14682,9 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// Check that N is CMPZ(CSINC(0, 0, CC, X)), return X if valid.
+// Check that N is CMPZ(CSINC(0, 0, CC, X)),
+// or CMPZ(CMOV(1, 0, CC, $cpsr, X))
+// return X if valid.
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
return SDValue();
@@ -14696,12 +14698,24 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
CSInc = CSInc.getOperand(0);
- if (CSInc.getOpcode() != ARMISD::CSINC ||
- !isNullConstant(CSInc.getOperand(0)) ||
- !isNullConstant(CSInc.getOperand(1)) || !CSInc->hasOneUse())
- return SDValue();
- CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
- return CSInc.getOperand(3);
+ if (CSInc.getOpcode() == ARMISD::CSINC &&
+ isNullConstant(CSInc.getOperand(0)) &&
+ isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
+ CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
+ return CSInc.getOperand(3);
+ }
+ if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
+ isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
+ CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
+ return CSInc.getOperand(4);
+ }
+ if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
+ isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
+ CC = ARMCC::getOppositeCondition(
+ (ARMCC::CondCodes)CSInc.getConstantOperandVal(2));
+ return CSInc.getOperand(4);
+ }
+ return SDValue();
}
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) {
@@ -15412,13 +15426,13 @@ static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N,
return SDValue();
SDLoc DL(Trunc);
- if (isVMOVNTruncMask(N->getMask(), VT, 0))
+ if (isVMOVNTruncMask(N->getMask(), VT, false))
return DAG.getNode(
ARMISD::VMOVN, DL, VT,
DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
DAG.getConstant(1, DL, MVT::i32));
- else if (isVMOVNTruncMask(N->getMask(), VT, 1))
+ else if (isVMOVNTruncMask(N->getMask(), VT, true))
return DAG.getNode(
ARMISD::VMOVN, DL, VT,
DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
@@ -18218,13 +18232,13 @@ SDValue ARMTargetLowering::PerformMVETruncCombine(
SmallVector<int, 8> Mask(S0->getMask().begin(), S0->getMask().end());
Mask.append(S1->getMask().begin(), S1->getMask().end());
- if (isVMOVNTruncMask(Mask, VT, 0))
+ if (isVMOVNTruncMask(Mask, VT, false))
return DAG.getNode(
ARMISD::VMOVN, DL, VT,
DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
DAG.getConstant(1, DL, MVT::i32));
- if (isVMOVNTruncMask(Mask, VT, 1))
+ if (isVMOVNTruncMask(Mask, VT, true))
return DAG.getNode(
ARMISD::VMOVN, DL, VT,
DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
@@ -20775,10 +20789,10 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
@@ -20787,10 +20801,10 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
- Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+ Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 5dee5e04af81..00db13f2eb52 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -28,8 +28,7 @@
#include "llvm/MC/MCInst.h"
using namespace llvm;
-ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI() {}
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI) {}
/// Return the noop instruction to use for a noop.
MCInst ARMInstrInfo::getNop() const {
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index aaf3280ea150..357aa6d062e9 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -4526,64 +4526,48 @@ let Predicates = [HasNEON, HasV8_1a] in {
defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
null_frag>;
- def : Pat<(v4i16 (saddsat
- (v4i16 DPR:$src1),
- (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
- (v4i16 DPR:$Vm))))),
+ def : Pat<(v4i16 (int_arm_neon_vqrdmlah (v4i16 DPR:$src1), (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))),
(v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
- def : Pat<(v2i32 (saddsat
- (v2i32 DPR:$src1),
- (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
- (v2i32 DPR:$Vm))))),
+ def : Pat<(v2i32 (int_arm_neon_vqrdmlah (v2i32 DPR:$src1), (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))),
(v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
- def : Pat<(v8i16 (saddsat
- (v8i16 QPR:$src1),
- (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
- (v8i16 QPR:$Vm))))),
+ def : Pat<(v8i16 (int_arm_neon_vqrdmlah (v8i16 QPR:$src1), (v8i16 QPR:$Vn),
+ (v8i16 QPR:$Vm))),
(v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
- def : Pat<(v4i32 (saddsat
- (v4i32 QPR:$src1),
- (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
- (v4i32 QPR:$Vm))))),
+ def : Pat<(v4i32 (int_arm_neon_vqrdmlah (v4i32 QPR:$src1), (v4i32 QPR:$Vn),
+ (v4i32 QPR:$Vm))),
(v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
null_frag>;
- def : Pat<(v4i16 (saddsat
- (v4i16 DPR:$src1),
- (v4i16 (int_arm_neon_vqrdmulh
+ def : Pat<(v4i16 (int_arm_neon_vqrdmlah (v4i16 DPR:$src1),
(v4i16 DPR:$Vn),
(v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
- imm:$lane)))))),
+ imm:$lane)))),
(v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
imm:$lane))>;
- def : Pat<(v2i32 (saddsat
- (v2i32 DPR:$src1),
- (v2i32 (int_arm_neon_vqrdmulh
+ def : Pat<(v2i32 (int_arm_neon_vqrdmlah (v2i32 DPR:$src1),
(v2i32 DPR:$Vn),
(v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
- imm:$lane)))))),
+ imm:$lane)))),
(v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
imm:$lane))>;
- def : Pat<(v8i16 (saddsat
- (v8i16 QPR:$src1),
- (v8i16 (int_arm_neon_vqrdmulh
+ def : Pat<(v8i16 (int_arm_neon_vqrdmlah (v8i16 QPR:$src1),
(v8i16 QPR:$src2),
(v8i16 (ARMvduplane (v8i16 QPR:$src3),
- imm:$lane)))))),
+ imm:$lane)))),
(v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
(v8i16 QPR:$src2),
(v4i16 (EXTRACT_SUBREG
QPR:$src3,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
- def : Pat<(v4i32 (saddsat
- (v4i32 QPR:$src1),
- (v4i32 (int_arm_neon_vqrdmulh
+ def : Pat<(v4i32 (int_arm_neon_vqrdmlah (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
(v4i32 (ARMvduplane (v4i32 QPR:$src3),
- imm:$lane)))))),
+ imm:$lane)))),
(v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG
@@ -4596,63 +4580,47 @@ let Predicates = [HasNEON, HasV8_1a] in {
defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
null_frag>;
- def : Pat<(v4i16 (ssubsat
- (v4i16 DPR:$src1),
- (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
- (v4i16 DPR:$Vm))))),
+ def : Pat<(v4i16 (int_arm_neon_vqrdmlsh (v4i16 DPR:$src1), (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))),
(v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
- def : Pat<(v2i32 (ssubsat
- (v2i32 DPR:$src1),
- (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
- (v2i32 DPR:$Vm))))),
+ def : Pat<(v2i32 (int_arm_neon_vqrdmlsh (v2i32 DPR:$src1), (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))),
(v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
- def : Pat<(v8i16 (ssubsat
- (v8i16 QPR:$src1),
- (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
- (v8i16 QPR:$Vm))))),
+ def : Pat<(v8i16 (int_arm_neon_vqrdmlsh (v8i16 QPR:$src1), (v8i16 QPR:$Vn),
+ (v8i16 QPR:$Vm))),
(v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
- def : Pat<(v4i32 (ssubsat
- (v4i32 QPR:$src1),
- (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
- (v4i32 QPR:$Vm))))),
+ def : Pat<(v4i32 (int_arm_neon_vqrdmlsh (v4i32 QPR:$src1), (v4i32 QPR:$Vn),
+ (v4i32 QPR:$Vm))),
(v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
null_frag>;
- def : Pat<(v4i16 (ssubsat
- (v4i16 DPR:$src1),
- (v4i16 (int_arm_neon_vqrdmulh
+ def : Pat<(v4i16 (int_arm_neon_vqrdmlsh (v4i16 DPR:$src1),
(v4i16 DPR:$Vn),
(v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
- imm:$lane)))))),
+ imm:$lane)))),
(v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
- def : Pat<(v2i32 (ssubsat
- (v2i32 DPR:$src1),
- (v2i32 (int_arm_neon_vqrdmulh
+ def : Pat<(v2i32 (int_arm_neon_vqrdmlsh (v2i32 DPR:$src1),
(v2i32 DPR:$Vn),
(v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
- imm:$lane)))))),
+ imm:$lane)))),
(v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
imm:$lane))>;
- def : Pat<(v8i16 (ssubsat
- (v8i16 QPR:$src1),
- (v8i16 (int_arm_neon_vqrdmulh
+ def : Pat<(v8i16 (int_arm_neon_vqrdmlsh (v8i16 QPR:$src1),
(v8i16 QPR:$src2),
(v8i16 (ARMvduplane (v8i16 QPR:$src3),
- imm:$lane)))))),
+ imm:$lane)))),
(v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
(v8i16 QPR:$src2),
(v4i16 (EXTRACT_SUBREG
QPR:$src3,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
- def : Pat<(v4i32 (ssubsat
- (v4i32 QPR:$src1),
- (v4i32 (int_arm_neon_vqrdmulh
+ def : Pat<(v4i32 (int_arm_neon_vqrdmlsh (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
(v4i32 (ARMvduplane (v4i32 QPR:$src3),
- imm:$lane)))))),
+ imm:$lane)))),
(v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG
diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index 8be4e3f160e3..188b5562cac9 100644
--- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -171,8 +171,8 @@ createARMInstructionSelector(const ARMBaseTargetMachine &TM,
ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM,
const ARMSubtarget &STI,
const ARMRegisterBankInfo &RBI)
- : InstructionSelector(), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI), Opcodes(STI),
+ : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI),
+ STI(STI), Opcodes(STI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "ARMGenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_INIT
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
index 6649750bb388..ff4647dd46fd 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -15,4 +15,4 @@ using namespace llvm;
void ARMRegisterInfo::anchor() { }
-ARMRegisterInfo::ARMRegisterInfo() : ARMBaseRegisterInfo() {}
+ARMRegisterInfo::ARMRegisterInfo() {}
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.h b/llvm/lib/Target/ARM/ARMRegisterInfo.h
index 87c0f322d3b3..2971b765a6fc 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.h
@@ -17,8 +17,6 @@
namespace llvm {
-class ARMSubtarget;
-
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
virtual void anchor();
public:
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 36c4bbaafcbf..2dd25234dc50 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -15,7 +15,6 @@
#include "ARMCallLowering.h"
#include "ARMLegalizerInfo.h"
#include "ARMRegisterBankInfo.h"
-#include "ARMSubtarget.h"
#include "ARMFrameLowering.h"
#include "ARMInstrInfo.h"
#include "ARMSubtarget.h"
@@ -35,6 +34,7 @@
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ARMTargetParser.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index e61b90af31b0..1c2b7ee6ba35 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -121,6 +121,7 @@ protected:
ARMv85a,
ARMv86a,
ARMv87a,
+ ARMv88a,
ARMv8a,
ARMv8mBaseline,
ARMv8mMainline,
@@ -129,6 +130,7 @@ protected:
ARMv9a,
ARMv91a,
ARMv92a,
+ ARMv93a,
};
public:
@@ -174,10 +176,12 @@ protected:
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
+ bool HasV8_8aOps = false;
bool HasV8_7aOps = false;
bool HasV9_0aOps = false;
bool HasV9_1aOps = false;
bool HasV9_2aOps = false;
+ bool HasV9_3aOps = false;
bool HasV8MBaselineOps = false;
bool HasV8MMainlineOps = false;
bool HasV8_1MMainlineOps = false;
@@ -635,9 +639,11 @@ public:
bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasV8_6aOps() const { return HasV8_6aOps; }
bool hasV8_7aOps() const { return HasV8_7aOps; }
+ bool hasV8_8aOps() const { return HasV8_8aOps; }
bool hasV9_0aOps() const { return HasV9_0aOps; }
bool hasV9_1aOps() const { return HasV9_1aOps; }
bool hasV9_2aOps() const { return HasV9_2aOps; }
+ bool hasV9_3aOps() const { return HasV9_3aOps; }
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 0b314ac2a41e..c38970f8e341 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -43,6 +43,7 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ARMTargetParser.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index 8c5438f7093b..936cae17f004 100644
--- a/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -54,9 +54,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
}
}
-const MCRegister ARMElfTargetObjectFile::getStaticBase() const {
- return ARM::R9;
-}
+MCRegister ARMElfTargetObjectFile::getStaticBase() const { return ARM::R9; }
const MCExpr *ARMElfTargetObjectFile::
getIndirectSymViaRWPI(const MCSymbol *Sym) const {
diff --git a/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/llvm/lib/Target/ARM/ARMTargetObjectFile.h
index 8b13198fe144..47334b9a8a45 100644
--- a/llvm/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -17,14 +17,13 @@ namespace llvm {
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
public:
- ARMElfTargetObjectFile()
- : TargetLoweringObjectFileELF() {
+ ARMElfTargetObjectFile() {
PLTRelativeVariantKind = MCSymbolRefExpr::VK_ARM_PREL31;
}
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
- const MCRegister getStaticBase() const override;
+ MCRegister getStaticBase() const override;
const MCExpr *getIndirectSymViaRWPI(const MCSymbol *Sym) const override;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 602c6745d310..e0750a9945d2 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1116,18 +1116,6 @@ bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) {
if (!EnableMaskedGatherScatters || !ST->hasMVEIntegerOps())
return false;
- // This method is called in 2 places:
- // - from the vectorizer with a scalar type, in which case we need to get
- // this as good as we can with the limited info we have (and rely on the cost
- // model for the rest).
- // - from the masked intrinsic lowering pass with the actual vector type.
- // For MVE, we have a custom lowering pass that will already have custom
- // legalised any gathers that we can to MVE intrinsics, and want to expand all
- // the rest. The pass runs before the masked intrinsic lowering pass, so if we
- // are here, we know we want to expand.
- if (isa<VectorType>(Ty))
- return false;
-
unsigned EltWidth = Ty->getScalarSizeInBits();
return ((EltWidth == 32 && Alignment >= 4) ||
(EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index a56886d4fc11..5bb84899e5ef 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -189,6 +189,18 @@ public:
return isLegalMaskedLoad(DataTy, Alignment);
}
+ bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
+ // For MVE, we have a custom lowering pass that will already have custom
+ // legalised any gathers that we can lower to MVE intrinsics, and want to
+ // expand all the rest. The pass runs before the masked intrinsic lowering
+ // pass.
+ return true;
+ }
+
+ bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
+ return forceScalarizeMaskedGather(VTy, Alignment);
+ }
+
bool isLegalMaskedGather(Type *Ty, Align Alignment);
bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index bfe078b06861..c7734cc2cf11 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -921,7 +921,7 @@ class ARMOperand : public MCParsedAsmOperand {
};
public:
- ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ ARMOperand(KindTy K) : Kind(K) {}
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const override { return StartLoc; }
@@ -1870,7 +1870,7 @@ public:
}
template <int shift> bool isMemRegRQOffset() const {
- if (!isMVEMem() || Memory.OffsetImm != 0 || Memory.Alignment != 0)
+ if (!isMVEMem() || Memory.OffsetImm != nullptr || Memory.Alignment != 0)
return false;
if (!ARMMCRegisterClasses[ARM::GPRnopcRegClassID].contains(
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 851acea94022..23430dfc017a 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -1049,11 +1049,11 @@ void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
unsigned Kind = Fixup.getKind();
if (Kind >= FirstLiteralRelocationKind)
return;
- unsigned NumBytes = getFixupKindNumBytes(Kind);
MCContext &Ctx = Asm.getContext();
Value = adjustFixupValue(Asm, Fixup, Target, Value, IsResolved, Ctx, STI);
if (!Value)
return; // Doesn't change encoding.
+ const unsigned NumBytes = getFixupKindNumBytes(Kind);
unsigned Offset = Fixup.getOffset();
assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
@@ -1123,9 +1123,8 @@ uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
DenseMap<unsigned, int> RegOffsets;
int FloatRegCount = 0;
// Process each .cfi directive and build up compact unwind info.
- for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
+ for (const MCCFIInstruction &Inst : Instrs) {
unsigned Reg;
- const MCCFIInstruction &Inst = Instrs[i];
switch (Inst.getOperation()) {
case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa
CFARegisterOffset = Inst.getOffset();
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 37d81e4b0af1..df8f54d14a86 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -87,7 +87,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
if (IsPCRel) {
switch (Fixup.getTargetKind()) {
default:
- Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
+ Ctx.reportError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
case FK_Data_4:
switch (Modifier) {
@@ -159,7 +159,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
}
switch (Kind) {
default:
- Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
+ Ctx.reportError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
case FK_Data_1:
switch (Modifier) {
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index e060e59e3759..16bc0ca179a7 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -264,10 +264,8 @@ void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) {
void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset,
const SmallVectorImpl<uint8_t> &Opcodes) {
OS << "\t.unwind_raw " << Offset;
- for (SmallVectorImpl<uint8_t>::const_iterator OCI = Opcodes.begin(),
- OCE = Opcodes.end();
- OCI != OCE; ++OCI)
- OS << ", 0x" << Twine::utohexstr(*OCI);
+ for (uint8_t Opcode : Opcodes)
+ OS << ", 0x" << Twine::utohexstr(Opcode);
OS << '\n';
}
@@ -788,6 +786,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::ArchKind::ARMV9A:
case ARM::ArchKind::ARMV9_1A:
case ARM::ArchKind::ARMV9_2A:
+ case ARM::ArchKind::ARMV9_3A:
S.setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
S.setAttributeItem(ARM_ISA_use, Allowed, false);
S.setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 05e5a473a3c6..17ca1866cf95 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -338,8 +338,8 @@ void ARM_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
{codeview::RegisterId::ARM_NQ14, ARM::Q14},
{codeview::RegisterId::ARM_NQ15, ARM::Q15},
};
- for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
- MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
+ for (const auto &I : RegMap)
+ MRI->mapLLVMRegToCVReg(I.Reg, static_cast<int>(I.CVReg));
}
static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) {
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 7ccdc6f85500..5c8f9bfdca08 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -36,8 +36,6 @@ class MCTargetStreamer;
class StringRef;
class Target;
class Triple;
-class raw_ostream;
-class raw_pwrite_stream;
namespace ARM_MC {
std::string ParseARMTriple(const Triple &TT, StringRef CPU);
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 54e80a095dd4..71a82a1e3271 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -167,7 +167,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
DebugLoc dl;
Register FramePtr = RegInfo->getFrameRegister(MF);
- unsigned BasePtr = RegInfo->getBaseRegister();
+ Register BasePtr = RegInfo->getBaseRegister();
int CFAOffset = 0;
// Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
@@ -206,7 +206,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
}
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
@@ -267,7 +267,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlags(MachineInstr::FrameSetup);
}
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
@@ -348,7 +348,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// Emit call frame information for the callee-saved high registers.
for (auto &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
@@ -376,7 +376,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// at this point in the prologue, so pick one.
unsigned ScratchRegister = ARM::NoRegister;
for (auto &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
ScratchRegister = Reg;
break;
@@ -531,7 +531,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned ScratchRegister = ARM::NoRegister;
bool HasFP = hasFP(MF);
for (auto &I : MFI.getCalleeSavedInfo()) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
ScratchRegister = Reg;
break;
@@ -825,7 +825,7 @@ bool Thumb1FrameLowering::spillCalleeSavedRegisters(
// LoRegs for saving HiRegs.
for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
LoRegsToSave[Reg] = true;
@@ -949,7 +949,7 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
ARMRegSet CopyRegs;
for (CalleeSavedInfo I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
LoRegsToRestore[Reg] = true;
@@ -1022,7 +1022,7 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
bool NeedsPop = false;
for (CalleeSavedInfo &Info : llvm::reverse(CSI)) {
- unsigned Reg = Info.getReg();
+ Register Reg = Info.getReg();
// High registers (excluding lr) have already been dealt with
if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 4b18f5e20d40..1a36c2ca9152 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -21,7 +21,7 @@
using namespace llvm;
Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI() {}
+ : ARMBaseInstrInfo(STI) {}
/// Return the noop instruction to use for a noop.
MCInst Thumb1InstrInfo::getNop() const {
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index e6d51796ba4d..a83ff5e51004 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -18,7 +18,6 @@
namespace llvm {
class ARMSubtarget;
-class ScheduleHazardRecognizer;
class Thumb2InstrInfo : public ARMBaseInstrInfo {
ThumbRegisterInfo RI;
diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index 1164b6ebbac3..1cc5422523f1 100644
--- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -1147,9 +1147,8 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
// predecessors.
ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
bool Modified = false;
- for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
- I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
- Modified |= ReduceMBB(**I);
+ for (MachineBasicBlock *MBB : RPOT)
+ Modified |= ReduceMBB(*MBB);
return Modified;
}
diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 4da6f6ab6994..5d2bc4ebe191 100644
--- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -37,7 +37,7 @@ extern cl::opt<bool> ReuseFrameIndexVals;
using namespace llvm;
-ThumbRegisterInfo::ThumbRegisterInfo() : ARMBaseRegisterInfo() {}
+ThumbRegisterInfo::ThumbRegisterInfo() {}
const TargetRegisterClass *
ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
diff --git a/llvm/lib/Target/AVR/AVR.h b/llvm/lib/Target/AVR/AVR.h
index 143c339c0664..0b512172ba10 100644
--- a/llvm/lib/Target/AVR/AVR.h
+++ b/llvm/lib/Target/AVR/AVR.h
@@ -28,7 +28,6 @@ FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
FunctionPass *createAVRExpandPseudoPass();
FunctionPass *createAVRFrameAnalyzerPass();
FunctionPass *createAVRRelaxMemPass();
-FunctionPass *createAVRDynAllocaSRPass();
FunctionPass *createAVRBranchSelectionPass();
void initializeAVRShiftExpandPass(PassRegistry &);
@@ -39,17 +38,56 @@ void initializeAVRRelaxMemPass(PassRegistry &);
namespace AVR {
/// An integer that identifies all of the supported AVR address spaces.
-enum AddressSpace { DataMemory, ProgramMemory };
+enum AddressSpace {
+ DataMemory,
+ ProgramMemory,
+ ProgramMemory1,
+ ProgramMemory2,
+ ProgramMemory3,
+ ProgramMemory4,
+ ProgramMemory5,
+ NumAddrSpaces,
+};
/// Checks if a given type is a pointer to program memory.
template <typename T> bool isProgramMemoryAddress(T *V) {
- return cast<PointerType>(V->getType())->getAddressSpace() == ProgramMemory;
+ auto *PT = cast<PointerType>(V->getType());
+ assert(PT != nullptr && "unexpected MemSDNode");
+ return PT->getAddressSpace() == ProgramMemory ||
+ PT->getAddressSpace() == ProgramMemory1 ||
+ PT->getAddressSpace() == ProgramMemory2 ||
+ PT->getAddressSpace() == ProgramMemory3 ||
+ PT->getAddressSpace() == ProgramMemory4 ||
+ PT->getAddressSpace() == ProgramMemory5;
+}
+
+template <typename T> AddressSpace getAddressSpace(T *V) {
+ auto *PT = cast<PointerType>(V->getType());
+ assert(PT != nullptr && "unexpected MemSDNode");
+ unsigned AS = PT->getAddressSpace();
+ if (AS < NumAddrSpaces)
+ return static_cast<AddressSpace>(AS);
+ return NumAddrSpaces;
}
inline bool isProgramMemoryAccess(MemSDNode const *N) {
- auto V = N->getMemOperand()->getValue();
+ auto *V = N->getMemOperand()->getValue();
+ if (V != nullptr && isProgramMemoryAddress(V))
+ return true;
+ return false;
+}
- return (V != nullptr) ? isProgramMemoryAddress(V) : false;
+// Get the index of the program memory bank.
+// -1: not program memory
+// 0: ordinary program memory
+// 1~5: extended program memory
+inline int getProgramMemoryBank(MemSDNode const *N) {
+ auto *V = N->getMemOperand()->getValue();
+ if (V == nullptr || !isProgramMemoryAddress(V))
+ return -1;
+ AddressSpace AS = getAddressSpace(V);
+ assert(ProgramMemory <= AS && AS <= ProgramMemory5);
+ return static_cast<int>(AS - ProgramMemory);
}
} // end of namespace AVR
diff --git a/llvm/lib/Target/AVR/AVRCallingConv.td b/llvm/lib/Target/AVR/AVRCallingConv.td
index 87874c5c50b2..b4bc35e191c0 100644
--- a/llvm/lib/Target/AVR/AVRCallingConv.td
+++ b/llvm/lib/Target/AVR/AVRCallingConv.td
@@ -36,4 +36,4 @@ def ArgCC_AVR_Vararg : CallingConv<[
//===----------------------------------------------------------------------===//
def CSR_Normal : CalleeSavedRegs<(add R29, R28, (sequence "R%u", 17, 2))>;
-def CSR_Interrupts : CalleeSavedRegs<(add(sequence "R%u", 31, 0))>;
+def CSR_Interrupts : CalleeSavedRegs<(add(sequence "R%u", 31, 2))>;
diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index cb85d73772c5..144ae2b320f9 100644
--- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -92,6 +92,7 @@ private:
/// Specific shift implementation.
bool expandLSLB7Rd(Block &MBB, BlockIt MBBI);
bool expandLSRB7Rd(Block &MBB, BlockIt MBBI);
+ bool expandASRB6Rd(Block &MBB, BlockIt MBBI);
bool expandASRB7Rd(Block &MBB, BlockIt MBBI);
bool expandLSLW4Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW4Rd(Block &MBB, BlockIt MBBI);
@@ -101,6 +102,9 @@ private:
bool expandLSLW12Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW12Rd(Block &MBB, BlockIt MBBI);
+ // Common implementation of LPMWRdZ and ELPMWRdZ.
+ bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt);
+
/// Scavenges a free GPR8 register for use.
Register scavengeGPR8(MachineInstr &MI);
};
@@ -808,18 +812,25 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
return true;
}
-template <>
-bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
+bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
Register TmpReg = 0; // 0 for no temporary register
Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
- unsigned OpLo = AVR::LPMRdZPi;
- unsigned OpHi = AVR::LPMRdZ;
+ unsigned OpLo = IsExt ? AVR::ELPMRdZPi : AVR::LPMRdZPi;
+ unsigned OpHi = IsExt ? AVR::ELPMRdZ : AVR::LPMRdZ;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+ // Set the I/O register RAMPZ for ELPM.
+ if (IsExt) {
+ const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
+ Register Bank = MI.getOperand(2).getReg();
+ // out RAMPZ, rtmp
+ buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(Bank);
+ }
+
// Use a temporary register if src and dst registers are the same.
if (DstReg == SrcReg)
TmpReg = scavengeGPR8(MI);
@@ -857,8 +868,51 @@ bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
}
template <>
+bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
+ return expandLPMWELPMW(MBB, MBBI, false);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ELPMWRdZ>(Block &MBB, BlockIt MBBI) {
+ return expandLPMWELPMW(MBB, MBBI, true);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ELPMBRdZ>(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register BankReg = MI.getOperand(2).getReg();
+ bool SrcIsKill = MI.getOperand(1).isKill();
+ const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
+
+ // Set the I/O register RAMPZ for ELPM (out RAMPZ, rtmp).
+ buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(BankReg);
+
+ // Load byte.
+ auto MILB = buildMI(MBB, MBBI, AVR::ELPMRdZ)
+ .addReg(DstReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
+
+ MILB.setMemRefs(MI.memoperands());
+
+ MI.eraseFromParent();
+ return true;
+}
+
+template <>
bool AVRExpandPseudo::expand<AVR::LPMWRdZPi>(Block &MBB, BlockIt MBBI) {
- llvm_unreachable("wide LPMPi is unimplemented");
+ llvm_unreachable("16-bit LPMPi is unimplemented");
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ELPMBRdZPi>(Block &MBB, BlockIt MBBI) {
+ llvm_unreachable("byte ELPMPi is unimplemented");
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ELPMWRdZPi>(Block &MBB, BlockIt MBBI) {
+ llvm_unreachable("16-bit ELPMPi is unimplemented");
}
template <typename Func>
@@ -1411,6 +1465,30 @@ bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
return true;
}
+template <>
+bool AVRExpandPseudo::expand<AVR::LSLWHiRd>(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool DstIsKill = MI.getOperand(1).isKill();
+ bool ImpIsDead = MI.getOperand(2).isDead();
+ TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+ // add hireg, hireg <==> lsl hireg
+ auto MILSL =
+ buildMI(MBB, MBBI, AVR::ADDRdRr)
+ .addReg(DstHiReg, RegState::Define, getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(DstHiReg, getKillRegState(DstIsKill));
+
+ if (ImpIsDead)
+ MILSL->getOperand(3).setIsDead();
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AVRExpandPseudo::expandLSLW4Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
@@ -1586,6 +1664,29 @@ bool AVRExpandPseudo::expand<AVR::LSRWRd>(Block &MBB, BlockIt MBBI) {
return true;
}
+template <>
+bool AVRExpandPseudo::expand<AVR::LSRWLoRd>(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool DstIsKill = MI.getOperand(1).isKill();
+ bool ImpIsDead = MI.getOperand(2).isDead();
+ TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+ // lsr loreg
+ auto MILSR =
+ buildMI(MBB, MBBI, AVR::LSRRd)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
+
+ if (ImpIsDead)
+ MILSR->getOperand(2).setIsDead();
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AVRExpandPseudo::expandLSRW4Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
@@ -1773,6 +1874,29 @@ bool AVRExpandPseudo::expand<AVR::ASRWRd>(Block &MBB, BlockIt MBBI) {
return true;
}
+template <>
+bool AVRExpandPseudo::expand<AVR::ASRWLoRd>(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstLoReg, DstHiReg;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool DstIsKill = MI.getOperand(1).isKill();
+ bool ImpIsDead = MI.getOperand(2).isDead();
+ TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+ // asr loreg
+ auto MIASR =
+ buildMI(MBB, MBBI, AVR::ASRRd)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
+
+ if (ImpIsDead)
+ MIASR->getOperand(2).setIsDead();
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
@@ -1921,6 +2045,44 @@ bool AVRExpandPseudo::expand<AVR::LSRBNRd>(Block &MBB, BlockIt MBBI) {
}
}
+bool AVRExpandPseudo::expandASRB6Rd(Block &MBB, BlockIt MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool DstIsKill = MI.getOperand(1).isKill();
+
+ // bst r24, 6
+ // lsl r24
+ // sbc r24, r24
+ // bld r24, 0
+
+ buildMI(MBB, MBBI, AVR::BST)
+ .addReg(DstReg)
+ .addImm(6)
+ ->getOperand(2)
+ .setIsUndef(true);
+
+ buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rd
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, getKillRegState(DstIsKill));
+
+ buildMI(MBB, MBBI, AVR::SBCRdRr)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, getKillRegState(DstIsKill));
+
+ buildMI(MBB, MBBI, AVR::BLD)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addImm(0)
+ ->getOperand(3)
+ .setIsKill();
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AVRExpandPseudo::expandASRB7Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstReg = MI.getOperand(0).getReg();
@@ -1957,6 +2119,8 @@ bool AVRExpandPseudo::expand<AVR::ASRBNRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned Imm = MI.getOperand(2).getImm();
switch (Imm) {
+ case 6:
+ return expandASRB6Rd(MBB, MBBI);
case 7:
return expandASRB7Rd(MBB, MBBI);
default:
@@ -2158,6 +2322,10 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
EXPAND(AVR::LDDWRdPtrQ);
EXPAND(AVR::LPMWRdZ);
EXPAND(AVR::LPMWRdZPi);
+ EXPAND(AVR::ELPMBRdZ);
+ EXPAND(AVR::ELPMWRdZ);
+ EXPAND(AVR::ELPMBRdZPi);
+ EXPAND(AVR::ELPMWRdZPi);
EXPAND(AVR::AtomicLoad8);
EXPAND(AVR::AtomicLoad16);
EXPAND(AVR::AtomicStore8);
@@ -2189,6 +2357,9 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
EXPAND(AVR::RORWRd);
EXPAND(AVR::ROLWRd);
EXPAND(AVR::ASRWRd);
+ EXPAND(AVR::LSLWHiRd);
+ EXPAND(AVR::LSRWLoRd);
+ EXPAND(AVR::ASRWLoRd);
EXPAND(AVR::LSLWNRd);
EXPAND(AVR::LSRWNRd);
EXPAND(AVR::ASRWNRd);
diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
index 543d94875037..b3bc9ede205e 100644
--- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -79,11 +79,6 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(AVR::R0, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII.get(AVR::EORRdRr))
- .addReg(AVR::R0, RegState::Define)
- .addReg(AVR::R0, RegState::Kill)
- .addReg(AVR::R0, RegState::Kill)
- .setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII.get(AVR::EORRdRr))
.addReg(AVR::R1, RegState::Define)
.addReg(AVR::R1, RegState::Kill)
.addReg(AVR::R1, RegState::Kill)
@@ -176,7 +171,7 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
const AVRInstrInfo &TII = *STI.getInstrInfo();
// Early exit if there is no need to restore the frame pointer.
- if (!FrameSize) {
+ if (!FrameSize && !MF.getFrameInfo().hasVarSizedObjects()) {
restoreStatusRegister(MF, MBB);
return;
}
@@ -193,22 +188,24 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
--MBBI;
}
- unsigned Opcode;
+ if (FrameSize) {
+ unsigned Opcode;
- // Select the optimal opcode depending on how big it is.
- if (isUInt<6>(FrameSize)) {
- Opcode = AVR::ADIWRdK;
- } else {
- Opcode = AVR::SUBIWRdK;
- FrameSize = -FrameSize;
- }
+ // Select the optimal opcode depending on how big it is.
+ if (isUInt<6>(FrameSize)) {
+ Opcode = AVR::ADIWRdK;
+ } else {
+ Opcode = AVR::SUBIWRdK;
+ FrameSize = -FrameSize;
+ }
- // Restore the frame pointer by doing FP += <size>.
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opcode), AVR::R29R28)
- .addReg(AVR::R29R28, RegState::Kill)
- .addImm(FrameSize);
- // The SREG implicit def is dead.
- MI->getOperand(3).setIsDead();
+ // Restore the frame pointer by doing FP += <size>.
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opcode), AVR::R29R28)
+ .addReg(AVR::R29R28, RegState::Kill)
+ .addImm(FrameSize);
+ // The SREG implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
// Write back R29R28 to SP and temporarily disable interrupts.
BuildMI(MBB, MBBI, DL, TII.get(AVR::SPWRITE), AVR::SP)
@@ -230,7 +227,8 @@ bool AVRFrameLowering::hasFP(const MachineFunction &MF) const {
const AVRMachineFunctionInfo *FuncInfo = MF.getInfo<AVRMachineFunctionInfo>();
return (FuncInfo->getHasSpills() || FuncInfo->getHasAllocas() ||
- FuncInfo->getHasStackArgs());
+ FuncInfo->getHasStackArgs() ||
+ MF.getFrameInfo().hasVarSizedObjects());
}
bool AVRFrameLowering::spillCalleeSavedRegisters(
@@ -248,7 +246,7 @@ bool AVRFrameLowering::spillCalleeSavedRegisters(
AVRMachineFunctionInfo *AVRFI = MF.getInfo<AVRMachineFunctionInfo>();
for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
bool IsNotLiveIn = !MBB.isLiveIn(Reg);
assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 &&
@@ -286,7 +284,7 @@ bool AVRFrameLowering::restoreCalleeSavedRegisters(
const TargetInstrInfo &TII = *STI.getInstrInfo();
for (const CalleeSavedInfo &CCSI : CSI) {
- unsigned Reg = CCSI.getReg();
+ Register Reg = CCSI.getReg();
assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 &&
"Invalid register size");
@@ -480,56 +478,4 @@ char AVRFrameAnalyzer::ID = 0;
/// Creates instance of the frame analyzer pass.
FunctionPass *createAVRFrameAnalyzerPass() { return new AVRFrameAnalyzer(); }
-/// Create the Dynalloca Stack Pointer Save/Restore pass.
-/// Insert a copy of SP before allocating the dynamic stack memory and restore
-/// it in function exit to restore the original SP state. This avoids the need
-/// of reserving a register pair for a frame pointer.
-struct AVRDynAllocaSR : public MachineFunctionPass {
- static char ID;
- AVRDynAllocaSR() : MachineFunctionPass(ID) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- // Early exit when there are no variable sized objects in the function.
- if (!MF.getFrameInfo().hasVarSizedObjects()) {
- return false;
- }
-
- const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- MachineBasicBlock &EntryMBB = MF.front();
- MachineBasicBlock::iterator MBBI = EntryMBB.begin();
- DebugLoc DL = EntryMBB.findDebugLoc(MBBI);
-
- Register SPCopy =
- MF.getRegInfo().createVirtualRegister(&AVR::DREGSRegClass);
-
- // Create a copy of SP in function entry before any dynallocas are
- // inserted.
- BuildMI(EntryMBB, MBBI, DL, TII.get(AVR::COPY), SPCopy).addReg(AVR::SP);
-
- // Restore SP in all exit basic blocks.
- for (MachineBasicBlock &MBB : MF) {
- // If last instruction is a return instruction, add a restore copy.
- if (!MBB.empty() && MBB.back().isReturn()) {
- MBBI = MBB.getLastNonDebugInstr();
- DL = MBBI->getDebugLoc();
- BuildMI(MBB, MBBI, DL, TII.get(AVR::COPY), AVR::SP)
- .addReg(SPCopy, RegState::Kill);
- }
- }
-
- return true;
- }
-
- StringRef getPassName() const override {
- return "AVR dynalloca stack pointer save/restore";
- }
-};
-
-char AVRDynAllocaSR::ID = 0;
-
-/// createAVRDynAllocaSRPass - returns an instance of the dynalloca stack
-/// pointer save/restore pass.
-FunctionPass *createAVRDynAllocaSRPass() { return new AVRDynAllocaSR(); }
-
} // end of namespace llvm
diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index 7ec2629ab45d..df364cae671c 100644
--- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -38,7 +38,7 @@ public:
bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Disp);
bool selectIndexedLoad(SDNode *N);
- unsigned selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT);
+ unsigned selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT, int Bank);
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintCode,
std::vector<SDValue> &OutOps) override;
@@ -165,35 +165,31 @@ bool AVRDAGToDAGISel::selectIndexedLoad(SDNode *N) {
return true;
}
-unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD,
- MVT VT) {
- ISD::MemIndexedMode AM = LD->getAddressingMode();
-
+unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT,
+ int Bank) {
// Progmem indexed loads only work in POSTINC mode.
- if (LD->getExtensionType() != ISD::NON_EXTLOAD || AM != ISD::POST_INC) {
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
+ LD->getAddressingMode() != ISD::POST_INC)
return 0;
- }
+
+ // Feature ELPM is needed for loading from extended program memory.
+ assert((Bank == 0 || Subtarget->hasELPM()) &&
+ "cannot load from extended program memory on this mcu");
unsigned Opcode = 0;
int Offs = cast<ConstantSDNode>(LD->getOffset())->getSExtValue();
switch (VT.SimpleTy) {
- case MVT::i8: {
- if (Offs != 1) {
- return 0;
- }
- Opcode = AVR::LPMRdZPi;
+ case MVT::i8:
+ if (Offs == 1)
+ Opcode = Bank > 0 ? AVR::ELPMBRdZPi : AVR::LPMRdZPi;
break;
- }
- case MVT::i16: {
- if (Offs != 2) {
- return 0;
- }
- Opcode = AVR::LPMWRdZPi;
+ case MVT::i16:
+ if (Offs == 2)
+ Opcode = Bank > 0 ? AVR::ELPMWRdZPi : AVR::LPMWRdZPi;
break;
- }
default:
- return 0;
+ break;
}
return Opcode;
@@ -360,7 +356,12 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
return selectIndexedLoad(N);
}
- assert(Subtarget->hasLPM() && "cannot load from program memory on this mcu");
+ if (!Subtarget->hasLPM())
+ report_fatal_error("cannot load from program memory on this mcu");
+
+ int ProgMemBank = AVR::getProgramMemoryBank(LD);
+ if (ProgMemBank < 0 || ProgMemBank > 5)
+ report_fatal_error("unexpected program memory bank");
// This is a flash memory load, move the pointer into R31R30 and emit
// the lpm instruction.
@@ -374,25 +375,48 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
Ptr = CurDAG->getCopyFromReg(Chain, DL, AVR::R31R30, MVT::i16,
Chain.getValue(1));
- SDValue RegZ = CurDAG->getRegister(AVR::R31R30, MVT::i16);
-
// Check if the opcode can be converted into an indexed load.
- if (unsigned LPMOpc = selectIndexedProgMemLoad(LD, VT)) {
+ if (unsigned LPMOpc = selectIndexedProgMemLoad(LD, VT, ProgMemBank)) {
// It is legal to fold the load into an indexed load.
- ResNode =
- CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr, RegZ);
- ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
+ if (ProgMemBank == 0) {
+ ResNode =
+ CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr);
+ } else {
+ // Do not combine the LDI instruction into the ELPM pseudo instruction,
+ // since it may be reused by other ELPM pseudo instructions.
+ SDValue NC = CurDAG->getTargetConstant(ProgMemBank, DL, MVT::i8);
+ auto *NP = CurDAG->getMachineNode(AVR::LDIRdK, DL, MVT::i8, NC);
+ ResNode = CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other,
+ Ptr, SDValue(NP, 0));
+ }
} else {
// Selecting an indexed load is not legal, fallback to a normal load.
switch (VT.SimpleTy) {
case MVT::i8:
- ResNode = CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other,
- Ptr, RegZ);
+ if (ProgMemBank == 0) {
+ ResNode =
+ CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other, Ptr);
+ } else {
+ // Do not combine the LDI instruction into the ELPM pseudo instruction,
+ // since it may be reused by other ELPM pseudo instructions.
+ SDValue NC = CurDAG->getTargetConstant(ProgMemBank, DL, MVT::i8);
+ auto *NP = CurDAG->getMachineNode(AVR::LDIRdK, DL, MVT::i8, NC);
+ ResNode = CurDAG->getMachineNode(AVR::ELPMBRdZ, DL, MVT::i8, MVT::Other,
+ Ptr, SDValue(NP, 0));
+ }
break;
case MVT::i16:
- ResNode = CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16, MVT::Other,
- Ptr, RegZ);
- ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
+ if (ProgMemBank == 0) {
+ ResNode =
+ CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16, MVT::Other, Ptr);
+ } else {
+ // Do not combine the LDI instruction into the ELPM pseudo instruction,
+ // since LDI requires the destination register in range R16~R31.
+ SDValue NC = CurDAG->getTargetConstant(ProgMemBank, DL, MVT::i8);
+ auto *NP = CurDAG->getMachineNode(AVR::LDIRdK, DL, MVT::i8, NC);
+ ResNode = CurDAG->getMachineNode(AVR::ELPMWRdZ, DL, MVT::i16,
+ MVT::Other, Ptr, SDValue(NP, 0));
+ }
break;
default:
llvm_unreachable("Unsupported VT!");
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index a6f2afb87102..a58fedf6cd36 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -359,6 +359,11 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
Victim = DAG.getNode(AVRISD::LSRBN, dl, VT, Victim,
DAG.getConstant(7, dl, VT));
ShiftAmount = 0;
+ } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 6) {
+ // Optimize ASR when ShiftAmount == 6.
+ Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim,
+ DAG.getConstant(6, dl, VT));
+ ShiftAmount = 0;
} else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) {
// Optimize ASR when ShiftAmount == 7.
Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim,
@@ -387,16 +392,22 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,
DAG.getConstant(8, dl, VT));
ShiftAmount -= 8;
+ // Only operate on the higher byte for remaining shift bits.
+ Opc8 = AVRISD::LSLHI;
break;
case ISD::SRL:
Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,
DAG.getConstant(8, dl, VT));
ShiftAmount -= 8;
+ // Only operate on the lower byte for remaining shift bits.
+ Opc8 = AVRISD::LSRLO;
break;
case ISD::SRA:
Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
DAG.getConstant(8, dl, VT));
ShiftAmount -= 8;
+ // Only operate on the lower byte for remaining shift bits.
+ Opc8 = AVRISD::ASRLO;
break;
default:
break;
@@ -407,11 +418,22 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,
DAG.getConstant(12, dl, VT));
ShiftAmount -= 12;
+ // Only operate on the higher byte for remaining shift bits.
+ Opc8 = AVRISD::LSLHI;
break;
case ISD::SRL:
Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,
DAG.getConstant(12, dl, VT));
ShiftAmount -= 12;
+ // Only operate on the lower byte for remaining shift bits.
+ Opc8 = AVRISD::LSRLO;
+ break;
+ case ISD::SRA:
+ Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
+ DAG.getConstant(8, dl, VT));
+ ShiftAmount -= 8;
+ // Only operate on the lower byte for remaining shift bits.
+ Opc8 = AVRISD::ASRLO;
break;
default:
break;
@@ -874,7 +896,8 @@ bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,
// Allow reg+<6bit> offset.
if (Offs < 0)
Offs = -Offs;
- if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 0 && isUInt<6>(Offs)) {
+ if (AM.BaseGV == nullptr && AM.HasBaseReg && AM.Scale == 0 &&
+ isUInt<6>(Offs)) {
return true;
}
@@ -1169,7 +1192,7 @@ SDValue AVRTargetLowering::LowerFormalArguments(
llvm_unreachable("Unknown argument type!");
}
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// :NOTE: Clang should not promote any i8 into i16 but for safety the
@@ -1672,6 +1695,18 @@ MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
return BB;
}
+// Insert a read from R1, which almost always contains the value 0.
+MachineBasicBlock *
+AVRTargetLowering::insertCopyR1(MachineInstr &MI, MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ MachineBasicBlock::iterator I(MI);
+ BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY))
+ .add(MI.getOperand(0))
+ .addReg(AVR::R1);
+ MI.eraseFromParent();
+ return BB;
+}
+
MachineBasicBlock *
AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const {
@@ -1694,6 +1729,8 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case AVR::MULRdRr:
case AVR::MULSRdRr:
return insertMul(MI, MBB);
+ case AVR::CopyR1:
+ return insertCopyR1(MI, MBB);
}
assert((Opc == AVR::Select16 || Opc == AVR::Select8) &&
@@ -2012,7 +2049,7 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
- SDValue Result(0, 0);
+ SDValue Result;
SDLoc DL(Op);
EVT Ty = Op.getValueType();
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h
index 3ae036b66bcb..116417b61566 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -38,12 +38,15 @@ enum NodeType {
LSL, ///< Logical shift left.
LSLBN, ///< Byte logical shift left N bits.
LSLWN, ///< Word logical shift left N bits.
+ LSLHI, ///< Higher 8-bit of word logical shift left.
LSR, ///< Logical shift right.
LSRBN, ///< Byte logical shift right N bits.
LSRWN, ///< Word logical shift right N bits.
+ LSRLO, ///< Lower 8-bit of word logical shift right.
ASR, ///< Arithmetic shift right.
ASRBN, ///< Byte arithmetic shift right N bits.
ASRWN, ///< Word arithmetic shift right N bits.
+ ASRLO, ///< Lower 8-bit of word arithmetic shift right.
ROR, ///< Bit rotate right.
ROL, ///< Bit rotate left.
LSLLOOP, ///< A loop of single logical shift left instructions.
@@ -184,6 +187,8 @@ protected:
private:
MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *insertCopyR1(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index 51060018a5ca..ac52c47f93d5 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -304,11 +304,11 @@ bool AVRInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
Cond.clear();
- FBB = 0;
+ FBB = nullptr;
// Delete the JMP if it's equivalent to a fall-through.
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
- TBB = 0;
+ TBB = nullptr;
I->eraseFromParent();
I = MBB.end();
UnCondBrIter = MBB.end();
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index c7f423292da0..2b96dc0b833a 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -60,6 +60,9 @@ def AVRlsr : SDNode<"AVRISD::LSR", SDTIntUnaryOp>;
def AVRrol : SDNode<"AVRISD::ROL", SDTIntUnaryOp>;
def AVRror : SDNode<"AVRISD::ROR", SDTIntUnaryOp>;
def AVRasr : SDNode<"AVRISD::ASR", SDTIntUnaryOp>;
+def AVRlslhi : SDNode<"AVRISD::LSLHI", SDTIntUnaryOp>;
+def AVRlsrlo : SDNode<"AVRISD::LSRLO", SDTIntUnaryOp>;
+def AVRasrlo : SDNode<"AVRISD::ASRLO", SDTIntUnaryOp>;
def AVRlslbn : SDNode<"AVRISD::LSLBN", SDTIntBinOp>;
def AVRlsrbn : SDNode<"AVRISD::LSRBN", SDTIntBinOp>;
def AVRasrbn : SDNode<"AVRISD::ASRBN", SDTIntBinOp>;
@@ -1391,7 +1394,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
// ldd Rd, P+q
// ldd Rd+1, P+q+1
let Constraints = "@earlyclobber $dst" in def LDDWRdPtrQ
- : Pseudo<(outs DREGS_WITHOUT_YZ_WORKAROUND
+ : Pseudo<(outs DREGS
: $dst),
(ins memri
: $memri),
@@ -1699,21 +1702,34 @@ let mayLoad = 1, hasSideEffects = 0 in {
: F16<0b1001010111011000, (outs), (ins), "elpm", []>,
Requires<[HasELPM]>;
- def ELPMRdZ : FLPMX<1, 0,
- (outs GPR8
- : $dst),
- (ins ZREG
- : $z),
+ def ELPMRdZ : FLPMX<1, 0, (outs GPR8:$dst), (ins ZREG:$z),
"elpm\t$dst, $z", []>,
Requires<[HasELPMX]>;
- let Defs = [R31R30] in def ELPMRdZPi : FLPMX<1, 1,
- (outs GPR8
- : $dst),
- (ins ZREG
- : $z),
- "elpm\t$dst, $z+", []>,
- Requires<[HasELPMX]>;
+ let Defs = [R31R30] in {
+ def ELPMRdZPi : FLPMX<1, 1, (outs GPR8:$dst), (ins ZREG:$z),
+ "elpm\t$dst, $z+", []>,
+ Requires<[HasELPMX]>;
+ }
+
+ // These pseudos are combination of the OUT and ELPM instructions.
+ let Defs = [R31R30], hasSideEffects = 1 in {
+ def ELPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p),
+ "elpmb\t$dst, $z, $p", []>,
+ Requires<[HasELPMX]>;
+
+ def ELPMWRdZ : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, LD8:$p),
+ "elpmw\t$dst, $z, $p", []>,
+ Requires<[HasELPMX]>;
+
+ def ELPMBRdZPi : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p),
+ "elpmb\t$dst, $z+, $p", []>,
+ Requires<[HasELPMX]>;
+
+ def ELPMWRdZPi : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, LD8:$p),
+ "elpmw\t$dst, $z+, $p", []>,
+ Requires<[HasELPMX]>;
+ }
}
// Store program memory operations.
@@ -1848,6 +1864,9 @@ let Constraints = "$src = $rd", Defs = [SREG] in {
: $src)),
(implicit SREG)]>;
+ def LSLWHiRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "lslwhi\t$rd",
+ [(set i16:$rd, (AVRlslhi i16:$src)), (implicit SREG)]>;
+
def LSLWNRd : Pseudo<(outs DLDREGS
: $rd),
(ins DREGS
@@ -1895,6 +1914,9 @@ let Constraints = "$src = $rd", Defs = [SREG] in {
: $src)),
(implicit SREG)]>;
+ def LSRWLoRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "lsrwlo\t$rd",
+ [(set i16:$rd, (AVRlsrlo i16:$src)), (implicit SREG)]>;
+
def LSRWNRd : Pseudo<(outs DLDREGS
: $rd),
(ins DREGS
@@ -1968,6 +1990,9 @@ let Constraints = "$src = $rd", Defs = [SREG] in {
: $src)),
(implicit SREG)]>;
+ def ASRWLoRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "asrwlo\t$rd",
+ [(set i16:$rd, (AVRasrlo i16:$src)), (implicit SREG)]>;
+
def ROLBRd : Pseudo<(outs GPR8
: $rd),
(ins GPR8
@@ -2365,6 +2390,10 @@ def Asr16 : ShiftPseudo<(outs DREGS
: $src, i8
: $cnt))]>;
+// lowered to a copy from R1, which contains the value zero.
+let usesCustomInserter=1 in
+def CopyR1 : Pseudo<(outs GPR8:$rd), (ins), "clrz\t$rd", [(set i8:$rd, 0)]>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index 1886debaf492..5dd7f5c55695 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -44,10 +44,7 @@ AVRRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t *
AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
- const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
-
- return AFI->isInterruptOrSignalHandler() ? CSR_Interrupts_RegMask
- : CSR_Normal_RegMask;
+ return CSR_Normal_RegMask;
}
BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.h b/llvm/lib/Target/AVR/AVRRegisterInfo.h
index fa27d9283209..2c5647b52c1c 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.h
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.h
@@ -27,7 +27,7 @@ public:
public:
const uint16_t *
- getCalleeSavedRegs(const MachineFunction *MF = 0) const override;
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
@@ -39,7 +39,7 @@ public:
/// Stack Frame Processing Methods
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
- RegScavenger *RS = NULL) const override;
+ RegScavenger *RS = nullptr) const override;
Register getFrameRegister(const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.td b/llvm/lib/Target/AVR/AVRRegisterInfo.td
index bb4e86ca0536..c5fda788fe4d 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.td
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -178,26 +178,6 @@ def DREGSMOVW : RegisterClass<"AVR", [i16], 8,
R29R28, R17R16, R15R14, R13R12, R11R10, R9R8,
R7R6, R5R4, R3R2, R1R0)>;
-// The 16-bit DREGS register class, excluding the Z pointer register.
-//
-// This is used by instructions which cause high pointer register
-// contention which leads to an assertion in the register allocator.
-//
-// There is no technical reason why instructions that use this class
-// cannot use Z; it's simply a workaround a regalloc bug.
-//
-// More information can be found in PR39553.
-def DREGS_WITHOUT_YZ_WORKAROUND
- : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R25R24, R19R18, R21R20, R23R22,
- // Scratch registers.
- R27R26,
- // Callee saved registers.
- R17R16, R15R14, R13R12, R11R10, R9R8, R7R6, R5R4, R3R2,
- R1R0)>;
-
// 16-bit register class for immediate instructions.
def DLDREGS : RegisterClass<"AVR", [i16], 8,
(
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.cpp b/llvm/lib/Target/AVR/AVRSubtarget.cpp
index 990e1c57e63f..8a5481423e9f 100644
--- a/llvm/lib/Target/AVR/AVRSubtarget.cpp
+++ b/llvm/lib/Target/AVR/AVRSubtarget.cpp
@@ -40,8 +40,7 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
m_hasTinyEncoding(false), m_hasMemMappedGPR(false),
m_FeatureSetDummy(false),
- InstrInfo(), FrameLowering(),
- TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo() {
+ TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)) {
// Parse features string.
ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
}
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h
index 90b9cd4da7c1..f8ca191b1868 100644
--- a/llvm/lib/Target/AVR/AVRSubtarget.h
+++ b/llvm/lib/Target/AVR/AVRSubtarget.h
@@ -91,6 +91,9 @@ public:
return ELFArch;
}
+ /// Get I/O register address.
+ int getIORegRAMPZ(void) const { return 0x3b; }
+
private:
/// The ELF e_flags architecture.
unsigned ELFArch;
diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index 65740f7c2306..22b9ba3ece07 100644
--- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -70,7 +70,6 @@ public:
bool addInstSelector() override;
void addPreSched2() override;
void addPreEmitPass() override;
- void addPreRegAlloc() override;
};
} // namespace
@@ -118,11 +117,6 @@ bool AVRPassConfig::addInstSelector() {
return false;
}
-void AVRPassConfig::addPreRegAlloc() {
- // Create the dynalloc SP save/restore pass to handle variable sized allocas.
- addPass(createAVRDynAllocaSRPass());
-}
-
void AVRPassConfig::addPreSched2() {
addPass(createAVRRelaxMemPass());
addPass(createAVRExpandPseudoPass());
diff --git a/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp b/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
index c7715ca1f51b..fe8e863be1a3 100644
--- a/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "AVRTargetObjectFile.h"
+#include "AVRTargetMachine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/DerivedTypes.h"
@@ -22,14 +23,60 @@ void AVRTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
Base::Initialize(Ctx, TM);
ProgmemDataSection =
Ctx.getELFSection(".progmem.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ Progmem1DataSection =
+ Ctx.getELFSection(".progmem1.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ Progmem2DataSection =
+ Ctx.getELFSection(".progmem2.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ Progmem3DataSection =
+ Ctx.getELFSection(".progmem3.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ Progmem4DataSection =
+ Ctx.getELFSection(".progmem4.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ Progmem5DataSection =
+ Ctx.getELFSection(".progmem5.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
}
MCSection *AVRTargetObjectFile::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
- // Global values in flash memory are placed in the progmem.data section
+ // Global values in flash memory are placed in the progmem*.data section
// unless they already have a user assigned section.
- if (AVR::isProgramMemoryAddress(GO) && !GO->hasSection() && Kind.isReadOnly())
- return ProgmemDataSection;
+ const auto &AVRTM = static_cast<const AVRTargetMachine &>(TM);
+ if (AVR::isProgramMemoryAddress(GO) && !GO->hasSection() &&
+ Kind.isReadOnly()) {
+ // The AVR subtarget should support LPM to access section '.progmem*.data'.
+ if (!AVRTM.getSubtargetImpl()->hasLPM()) {
+ // TODO: Get the global object's location in source file.
+ getContext().reportError(
+ SMLoc(),
+ "Current AVR subtarget does not support accessing program memory");
+ return Base::SelectSectionForGlobal(GO, Kind, TM);
+ }
+ // The AVR subtarget should support ELPM to access section
+ // '.progmem[1|2|3|4|5].data'.
+ if (!AVRTM.getSubtargetImpl()->hasELPM() &&
+ AVR::getAddressSpace(GO) != AVR::ProgramMemory) {
+ // TODO: Get the global object's location in source file.
+ getContext().reportError(SMLoc(),
+ "Current AVR subtarget does not support "
+ "accessing extended program memory");
+ return ProgmemDataSection;
+ }
+ switch (AVR::getAddressSpace(GO)) {
+ case AVR::ProgramMemory: // address space 1
+ return ProgmemDataSection;
+ case AVR::ProgramMemory1: // address space 2
+ return Progmem1DataSection;
+ case AVR::ProgramMemory2: // address space 3
+ return Progmem2DataSection;
+ case AVR::ProgramMemory3: // address space 4
+ return Progmem3DataSection;
+ case AVR::ProgramMemory4: // address space 5
+ return Progmem4DataSection;
+ case AVR::ProgramMemory5: // address space 6
+ return Progmem5DataSection;
+ default:
+ llvm_unreachable("unexpected program memory index");
+ }
+ }
// Otherwise, we work the same way as ELF.
return Base::SelectSectionForGlobal(GO, Kind, TM);
diff --git a/llvm/lib/Target/AVR/AVRTargetObjectFile.h b/llvm/lib/Target/AVR/AVRTargetObjectFile.h
index 53d8510d9a21..609849b44029 100644
--- a/llvm/lib/Target/AVR/AVRTargetObjectFile.h
+++ b/llvm/lib/Target/AVR/AVRTargetObjectFile.h
@@ -25,6 +25,11 @@ public:
private:
MCSection *ProgmemDataSection;
+ MCSection *Progmem1DataSection;
+ MCSection *Progmem2DataSection;
+ MCSection *Progmem3DataSection;
+ MCSection *Progmem4DataSection;
+ MCSection *Progmem5DataSection;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index 95ecd28200ba..f19e7840eb31 100644
--- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -107,13 +107,13 @@ class AVROperand : public MCParsedAsmOperand {
public:
AVROperand(StringRef Tok, SMLoc const &S)
- : Base(), Kind(k_Token), Tok(Tok), Start(S), End(S) {}
+ : Kind(k_Token), Tok(Tok), Start(S), End(S) {}
AVROperand(unsigned Reg, SMLoc const &S, SMLoc const &E)
- : Base(), Kind(k_Register), RegImm({Reg, nullptr}), Start(S), End(E) {}
+ : Kind(k_Register), RegImm({Reg, nullptr}), Start(S), End(E) {}
AVROperand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
- : Base(), Kind(k_Immediate), RegImm({0, Imm}), Start(S), End(E) {}
+ : Kind(k_Immediate), RegImm({0, Imm}), Start(S), End(E) {}
AVROperand(unsigned Reg, MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
- : Base(), Kind(k_Memri), RegImm({Reg, Imm}), Start(S), End(E) {}
+ : Kind(k_Memri), RegImm({Reg, Imm}), Start(S), End(E) {}
struct RegisterImmediate {
unsigned Reg;
@@ -281,7 +281,7 @@ bool AVRAsmParser::invalidOperand(SMLoc const &Loc,
OperandVector const &Operands,
uint64_t const &ErrorInfo) {
SMLoc ErrorLoc = Loc;
- char const *Diag = 0;
+ char const *Diag = nullptr;
if (ErrorInfo != ~0U) {
if (ErrorInfo >= Operands.size()) {
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index a3a4d63932c0..3624ade854c0 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -47,7 +47,7 @@ static void signed_width(unsigned Width, uint64_t Value,
" to " + std::to_string(Max) + ")";
if (Ctx) {
- Ctx->reportFatalError(Fixup.getLoc(), Diagnostic);
+ Ctx->reportError(Fixup.getLoc(), Diagnostic);
} else {
llvm_unreachable(Diagnostic.c_str());
}
@@ -66,7 +66,7 @@ static void unsigned_width(unsigned Width, uint64_t Value,
" (expected an integer in the range 0 to " + std::to_string(Max) + ")";
if (Ctx) {
- Ctx->reportFatalError(Fixup.getLoc(), Diagnostic);
+ Ctx->reportError(Fixup.getLoc(), Diagnostic);
} else {
llvm_unreachable(Diagnostic.c_str());
}
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 50298bf5e943..697deb117bcb 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -101,7 +101,7 @@ struct BPFOperand : public MCParsedAsmOperand {
ImmOp Imm;
};
- BPFOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ BPFOperand(KindTy K) : Kind(K) {}
public:
BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index ab7e848409d9..46141e69d9d4 100644
--- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -1002,7 +1002,7 @@ bool BPFAbstractMemberAccess::transformGEPChain(CallInst *Call,
VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr or enum value
GV = new GlobalVariable(*M, VarType, false, GlobalVariable::ExternalLinkage,
- NULL, AccessKey);
+ nullptr, AccessKey);
GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta);
GEPGlobals[AccessKey] = GV;
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 90723ac04f64..0587cb0e16e3 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -325,7 +325,7 @@ SDValue BPFTargetLowering::LowerFormalArguments(
default: {
errs() << "LowerFormalArguments Unhandled argument type: "
<< RegVT.getEVTString() << '\n';
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
case MVT::i32:
case MVT::i64:
diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp
index eb8c48ac49de..2bc2302cf55c 100644
--- a/llvm/lib/Target/BPF/BPFMIChecking.cpp
+++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -41,7 +41,7 @@ private:
// Initialize class variables.
void initialize(MachineFunction &MFParm);
- bool processAtomicInsts(void);
+ bool processAtomicInsts();
public:
@@ -151,7 +151,7 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
return false;
}
-bool BPFMIPreEmitChecking::processAtomicInsts(void) {
+bool BPFMIPreEmitChecking::processAtomicInsts() {
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
if (MI.getOpcode() != BPF::XADDW &&
diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index 354980e4bf3c..7f69c8a63443 100644
--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -56,8 +56,8 @@ private:
bool isInsnFrom32Def(MachineInstr *DefInsn);
bool isPhiFrom32Def(MachineInstr *MovMI);
bool isMovFrom32Def(MachineInstr *MovMI);
- bool eliminateZExtSeq(void);
- bool eliminateZExt(void);
+ bool eliminateZExtSeq();
+ bool eliminateZExt();
std::set<MachineInstr *> PhiInsns;
@@ -172,7 +172,7 @@ bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
return true;
}
-bool BPFMIPeephole::eliminateZExtSeq(void) {
+bool BPFMIPeephole::eliminateZExtSeq() {
MachineInstr* ToErase = nullptr;
bool Eliminated = false;
@@ -240,7 +240,7 @@ bool BPFMIPeephole::eliminateZExtSeq(void) {
return Eliminated;
}
-bool BPFMIPeephole::eliminateZExt(void) {
+bool BPFMIPeephole::eliminateZExt() {
MachineInstr* ToErase = nullptr;
bool Eliminated = false;
@@ -312,7 +312,7 @@ private:
// Initialize class variables.
void initialize(MachineFunction &MFParm);
- bool eliminateRedundantMov(void);
+ bool eliminateRedundantMov();
public:
@@ -334,7 +334,7 @@ void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
}
-bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) {
+bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
MachineInstr* ToErase = nullptr;
bool Eliminated = false;
@@ -405,7 +405,7 @@ private:
// Initialize class variables.
void initialize(MachineFunction &MFParm);
- bool eliminateTruncSeq(void);
+ bool eliminateTruncSeq();
public:
@@ -452,7 +452,7 @@ void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) {
// are 32-bit registers, but later on, kernel verifier will rewrite
// it with 64-bit value. Therefore, truncating the value after the
// load will result in incorrect code.
-bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) {
+bool BPFMIPeepholeTruncElim::eliminateTruncSeq() {
MachineInstr* ToErase = nullptr;
bool Eliminated = false;
diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index 7e829ea43e89..b4232875383c 100644
--- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -55,7 +55,7 @@ private:
// Initialize class variables.
void initialize(MachineFunction &MFParm);
- bool removeLD(void);
+ bool removeLD();
void processCandidate(MachineRegisterInfo *MRI, MachineBasicBlock &MBB,
MachineInstr &MI, Register &SrcReg, Register &DstReg,
const GlobalValue *GVal, bool IsAma);
diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
index 36237b2fc4fd..6dfb7dc39922 100644
--- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
+++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
@@ -105,10 +105,10 @@ static bool BPFPreserveDITypeImpl(Function &F) {
BasicBlock *BB = Call->getParent();
IntegerType *VarType = Type::getInt64Ty(BB->getContext());
- std::string GVName = BaseName + std::to_string(Count) + "$" +
- std::to_string(Reloc);
+ std::string GVName =
+ BaseName + std::to_string(Count) + "$" + std::to_string(Reloc);
GlobalVariable *GV = new GlobalVariable(
- *M, VarType, false, GlobalVariable::ExternalLinkage, NULL, GVName);
+ *M, VarType, false, GlobalVariable::ExternalLinkage, nullptr, GVName);
GV->addAttribute(BPFCoreSharedInfo::TypeIdAttr);
GV->setMetadata(LLVMContext::MD_preserve_access_index, MD);
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index 77e3cd393f87..e4d98b85e58b 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -59,6 +59,6 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
- : BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(),
+ : BPFGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
FrameLowering(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this) {}
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index 0c510686a13b..d536aed1d211 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -1366,7 +1366,7 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
// Calculate symbol size
const DataLayout &DL = Global.getParent()->getDataLayout();
- uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType());
+ uint32_t Size = DL.getTypeAllocSize(Global.getValueType());
DataSecEntries[std::string(SecName)]->addDataSecEntry(VarId,
Asm->getSymbol(&Global), Size);
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
index e0aeec989879..200c72a07ed6 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
@@ -50,7 +50,7 @@ static void printExpr(const MCExpr *Expr, raw_ostream &O) {
void BPFInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier) {
- assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ assert((Modifier == nullptr || Modifier[0] == 0) && "No modifiers supported");
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
O << getRegisterName(Op.getReg());
diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
index 29b99a84a6cd..a62bd111cba9 100644
--- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
+++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
@@ -303,6 +303,14 @@ public:
bool isRegSeq() const { return isRegSeqTemplate<CSKY::R0, CSKY::R31>(); }
+ bool isRegSeqV1() const {
+ return isRegSeqTemplate<CSKY::F0_32, CSKY::F15_32>();
+ }
+
+ bool isRegSeqV2() const {
+ return isRegSeqTemplate<CSKY::F0_32, CSKY::F31_32>();
+ }
+
static bool isLegalRegList(unsigned from, unsigned to) {
if (from == 0 && to == 0)
return true;
diff --git a/llvm/lib/Target/CSKY/CSKY.h b/llvm/lib/Target/CSKY/CSKY.h
index 357b1e96e606..401d6fa1a0a5 100644
--- a/llvm/lib/Target/CSKY/CSKY.h
+++ b/llvm/lib/Target/CSKY/CSKY.h
@@ -21,6 +21,9 @@ class CSKYTargetMachine;
class FunctionPass;
FunctionPass *createCSKYISelDag(CSKYTargetMachine &TM);
+FunctionPass *createCSKYConstantIslandPass();
+
+void initializeCSKYConstantIslandsPass(PassRegistry &);
} // namespace llvm
diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td
index e26781ca6aa1..ddb7fe93706e 100644
--- a/llvm/lib/Target/CSKY/CSKY.td
+++ b/llvm/lib/Target/CSKY/CSKY.td
@@ -11,6 +11,40 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// CSKY subtarget features and instruction predicates.
//===----------------------------------------------------------------------===//
+def ModeHardFloat :
+ SubtargetFeature<"hard-float", "UseHardFloat",
+ "true", "Use hard floating point features">;
+def ModeHardFloatABI :
+ SubtargetFeature<"hard-float-abi", "UseHardFloatABI",
+ "true", "Use hard floating point ABI to pass args">;
+
+def FeatureFPUV2_SF
+ : SubtargetFeature<"fpuv2_sf", "HasFPUv2SingleFloat", "true",
+ "Enable FPUv2 single float instructions">;
+def HasFPUv2_SF : Predicate<"Subtarget->hasFPUv2SingleFloat()">,
+ AssemblerPredicate<(all_of FeatureFPUV2_SF),
+ "Enable FPUv2 single float instructions">;
+
+def FeatureFPUV2_DF
+ : SubtargetFeature<"fpuv2_df", "HasFPUv2DoubleFloat", "true",
+ "Enable FPUv2 double float instructions">;
+def HasFPUv2_DF : Predicate<"Subtarget->hasFPUv2DoubleFloat()">,
+ AssemblerPredicate<(all_of FeatureFPUV2_DF),
+ "Enable FPUv2 double float instructions">;
+
+def FeatureFPUV3_SF
+ : SubtargetFeature<"fpuv3_sf", "HasFPUv3SingleFloat", "true",
+ "Enable FPUv3 single float instructions">;
+def HasFPUv3_SF : Predicate<"Subtarget->hasFPUv3SingleFloat()">,
+ AssemblerPredicate<(all_of FeatureFPUV3_SF),
+ "Enable FPUv3 single float instructions">;
+
+def FeatureFPUV3_DF
+ : SubtargetFeature<"fpuv3_df", "HasFPUv3DoubleFloat", "true",
+ "Enable FPUv3 double float instructions">;
+def HasFPUv3_DF : Predicate<"Subtarget->hasFPUv3DoubleFloat()">,
+ AssemblerPredicate<(all_of FeatureFPUV3_DF),
+ "Enable FPUv3 double float instructions">;
def FeatureBTST16 : SubtargetFeature<"btst16", "HasBTST16", "true",
"Use the 16-bit btsti instruction">;
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
index 85129f78e726..c8269eeacfdb 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "CSKYAsmPrinter.h"
#include "CSKY.h"
+#include "CSKYConstantPoolValue.h"
#include "CSKYTargetMachine.h"
#include "MCTargetDesc/CSKYInstPrinter.h"
#include "MCTargetDesc/CSKYMCExpr.h"
@@ -38,6 +39,7 @@ CSKYAsmPrinter::CSKYAsmPrinter(llvm::TargetMachine &TM,
: AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this) {}
bool CSKYAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ MCP = MF.getConstantPool();
Subtarget = &MF.getSubtarget<CSKYSubtarget>();
return AsmPrinter::runOnMachineFunction(MF);
}
@@ -56,16 +58,166 @@ void CSKYAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
// instructions) auto-generated.
#include "CSKYGenMCPseudoLowering.inc"
+void CSKYAsmPrinter::expandTLSLA(const MachineInstr *MI) {
+ const CSKYInstrInfo *TII = Subtarget->getInstrInfo();
+
+ DebugLoc DL = MI->getDebugLoc();
+
+ MCSymbol *PCLabel = OutContext.getOrCreateSymbol(
+ Twine(MAI->getPrivateGlobalPrefix()) + "PC" + Twine(getFunctionNumber()) +
+ "_" + Twine(MI->getOperand(3).getImm()));
+
+ OutStreamer->emitLabel(PCLabel);
+
+ auto Instr = BuildMI(*MF, DL, TII->get(CSKY::LRW32))
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(2));
+ MCInst LRWInst;
+ MCInstLowering.Lower(Instr, LRWInst);
+ EmitToStreamer(*OutStreamer, LRWInst);
+
+ Instr = BuildMI(*MF, DL, TII->get(CSKY::GRS32))
+ .add(MI->getOperand(1))
+ .addSym(PCLabel);
+ MCInst GRSInst;
+ MCInstLowering.Lower(Instr, GRSInst);
+ EmitToStreamer(*OutStreamer, GRSInst);
+ return;
+}
+
+void CSKYAsmPrinter::emitCustomConstantPool(const MachineInstr *MI) {
+
+ // This instruction represents a floating constant pool in the function.
+ // The first operand is the ID# for this instruction, the second is the
+ // index into the MachineConstantPool that this is, the third is the size
+ // in bytes of this constant pool entry.
+ // The required alignment is specified on the basic block holding this MI.
+ unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+ unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+
+ // If this is the first entry of the pool, mark it.
+ if (!InConstantPool) {
+ OutStreamer->emitValueToAlignment(4);
+ InConstantPool = true;
+ }
+
+ OutStreamer->emitLabel(GetCPISymbol(LabelId));
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+ if (MCPE.isMachineConstantPoolEntry())
+ emitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ else
+ emitGlobalConstant(MF->getDataLayout(), MCPE.Val.ConstVal);
+ return;
+}
+
+void CSKYAsmPrinter::emitFunctionBodyEnd() {
+ // Make sure to terminate any constant pools that were at the end
+ // of the function.
+ if (!InConstantPool)
+ return;
+ InConstantPool = false;
+}
+
void CSKYAsmPrinter::emitInstruction(const MachineInstr *MI) {
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
+ // If we just ended a constant pool, mark it as such.
+ if (InConstantPool && MI->getOpcode() != CSKY::CONSTPOOL_ENTRY) {
+ InConstantPool = false;
+ }
+
+ if (MI->getOpcode() == CSKY::PseudoTLSLA32)
+ return expandTLSLA(MI);
+
+ if (MI->getOpcode() == CSKY::CONSTPOOL_ENTRY)
+ return emitCustomConstantPool(MI);
+
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
}
+// Convert a CSKY-specific constant pool modifier into the associated
+// MCSymbolRefExpr variant kind.
+static CSKYMCExpr::VariantKind
+getModifierVariantKind(CSKYCP::CSKYCPModifier Modifier) {
+ switch (Modifier) {
+ case CSKYCP::NO_MOD:
+ return CSKYMCExpr::VK_CSKY_None;
+ case CSKYCP::ADDR:
+ return CSKYMCExpr::VK_CSKY_ADDR;
+ case CSKYCP::GOT:
+ return CSKYMCExpr::VK_CSKY_GOT;
+ case CSKYCP::GOTOFF:
+ return CSKYMCExpr::VK_CSKY_GOTOFF;
+ case CSKYCP::PLT:
+ return CSKYMCExpr::VK_CSKY_PLT;
+ case CSKYCP::TLSGD:
+ return CSKYMCExpr::VK_CSKY_TLSGD;
+ case CSKYCP::TLSLE:
+ return CSKYMCExpr::VK_CSKY_TLSLE;
+ case CSKYCP::TLSIE:
+ return CSKYMCExpr::VK_CSKY_TLSIE;
+ }
+ llvm_unreachable("Invalid CSKYCPModifier!");
+}
+
+void CSKYAsmPrinter::emitMachineConstantPoolValue(
+ MachineConstantPoolValue *MCPV) {
+ int Size = getDataLayout().getTypeAllocSize(MCPV->getType());
+ CSKYConstantPoolValue *CCPV = static_cast<CSKYConstantPoolValue *>(MCPV);
+ MCSymbol *MCSym;
+
+ if (CCPV->isBlockAddress()) {
+ const BlockAddress *BA =
+ cast<CSKYConstantPoolConstant>(CCPV)->getBlockAddress();
+ MCSym = GetBlockAddressSymbol(BA);
+ } else if (CCPV->isGlobalValue()) {
+ const GlobalValue *GV = cast<CSKYConstantPoolConstant>(CCPV)->getGV();
+ MCSym = getSymbol(GV);
+ } else if (CCPV->isMachineBasicBlock()) {
+ const MachineBasicBlock *MBB = cast<CSKYConstantPoolMBB>(CCPV)->getMBB();
+ MCSym = MBB->getSymbol();
+ } else if (CCPV->isJT()) {
+ signed JTI = cast<CSKYConstantPoolJT>(CCPV)->getJTI();
+ MCSym = GetJTISymbol(JTI);
+ } else {
+ assert(CCPV->isExtSymbol() && "unrecognized constant pool value");
+ StringRef Sym = cast<CSKYConstantPoolSymbol>(CCPV)->getSymbol();
+ MCSym = GetExternalSymbolSymbol(Sym);
+ }
+ // Create an MCSymbol for the reference.
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(MCSym, MCSymbolRefExpr::VK_None, OutContext);
+
+ if (CCPV->getPCAdjustment()) {
+
+ MCSymbol *PCLabel = OutContext.getOrCreateSymbol(
+ Twine(MAI->getPrivateGlobalPrefix()) + "PC" +
+ Twine(getFunctionNumber()) + "_" + Twine(CCPV->getLabelID()));
+
+ const MCExpr *PCRelExpr = MCSymbolRefExpr::create(PCLabel, OutContext);
+ if (CCPV->mustAddCurrentAddress()) {
+ // We want "(<expr> - .)", but MC doesn't have a concept of the '.'
+ // label, so just emit a local label end reference that instead.
+ MCSymbol *DotSym = OutContext.createTempSymbol();
+ OutStreamer->emitLabel(DotSym);
+ const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
+ PCRelExpr = MCBinaryExpr::createSub(PCRelExpr, DotExpr, OutContext);
+ }
+ Expr = MCBinaryExpr::createSub(Expr, PCRelExpr, OutContext);
+ }
+
+ // Create an MCSymbol for the reference.
+ Expr = CSKYMCExpr::create(Expr, getModifierVariantKind(CCPV->getModifier()),
+ OutContext);
+
+ OutStreamer->emitValue(Expr, Size);
+}
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYAsmPrinter() {
RegisterAsmPrinter<CSKYAsmPrinter> X(getTheCSKYTarget());
}
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
index b30311e0ca64..04a253d349c8 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
@@ -20,6 +20,15 @@ class LLVM_LIBRARY_VISIBILITY CSKYAsmPrinter : public AsmPrinter {
const CSKYSubtarget *Subtarget;
+ bool InConstantPool = false;
+
+ /// Keep a pointer to constantpool entries of the current
+ /// MachineFunction.
+ MachineConstantPool *MCP;
+
+ void expandTLSLA(const MachineInstr *MI);
+ void emitCustomConstantPool(const MachineInstr *MI);
+
public:
explicit CSKYAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer);
@@ -33,9 +42,16 @@ public:
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
const MachineInstr *MI);
+ void emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
+
+ void emitFunctionBodyEnd() override;
+
void emitInstruction(const MachineInstr *MI) override;
bool runOnMachineFunction(MachineFunction &MF) override;
+
+ // we emit constant pools customly!
+ void emitConstantPool() override{};
};
} // end namespace llvm
diff --git a/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp
new file mode 100644
index 000000000000..3ac335e2ad9d
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp
@@ -0,0 +1,1376 @@
+//===- CSKYConstantIslandPass.cpp - Emit PC Relative loads ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+// Loading constants inline is expensive on CSKY and it's in general better
+// to place the constant nearby in code space and then it can be loaded with a
+// simple 16/32 bit load instruction like lrw.
+//
+// The constants can be not just numbers but addresses of functions and labels.
+// This can be particularly helpful in static relocation mode for embedded
+// non-linux targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKY.h"
+#include "CSKYConstantPoolValue.h"
+#include "CSKYMachineFunctionInfo.h"
+#include "CSKYSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "CSKY-constant-islands"
+
+STATISTIC(NumCPEs, "Number of constpool entries");
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+
+namespace {
+
+using Iter = MachineBasicBlock::iterator;
+using ReverseIter = MachineBasicBlock::reverse_iterator;
+
+/// CSKYConstantIslands - Due to limited PC-relative displacements, CSKY
+/// requires constant pool entries to be scattered among the instructions
+/// inside a function. To do this, it completely ignores the normal LLVM
+/// constant pool; instead, it places constants wherever it feels like with
+/// special instructions.
+///
+/// The terminology used in this pass includes:
+/// Islands - Clumps of constants placed in the function.
+/// Water - Potential places where an island could be formed.
+/// CPE - A constant pool entry that has been placed somewhere, which
+/// tracks a list of users.
+
+class CSKYConstantIslands : public MachineFunctionPass {
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
+ unsigned Offset = 0;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size = 0;
+
+ BasicBlockInfo() = default;
+
+ unsigned postOffset() const { return Offset + Size; }
+ };
+
+ std::vector<BasicBlockInfo> BBInfo;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock *> WaterList;
+
+ /// NewWaterList - The subset of WaterList that was created since the
+ /// previous iteration by inserting unconditional branches.
+ SmallSet<MachineBasicBlock *, 4> NewWaterList;
+
+ using water_iterator = std::vector<MachineBasicBlock *>::iterator;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP. The HighWaterMark records the
+ /// highest basic block where a new CPEntry can be placed. To ensure this
+ /// pass terminates, the CP entries are initially placed at the end of the
+ /// function and then move monotonically to lower addresses. The
+ /// exception to this rule is when the current CP entry for a particular
+ /// CPUser is out of range, but there is another CP entry for the same
+ /// constant value in range. We want to use the existing in-range CP
+ /// entry, but if it later moves out of range, the search for new water
+ /// should resume where it left off. The HighWaterMark is used to record
+ /// that point.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ MachineBasicBlock *HighWaterMark;
+
+ private:
+ unsigned MaxDisp;
+
+ public:
+ bool NegOk;
+
+ CPUser(MachineInstr *Mi, MachineInstr *Cpemi, unsigned Maxdisp, bool Neg)
+ : MI(Mi), CPEMI(Cpemi), MaxDisp(Maxdisp), NegOk(Neg) {
+ HighWaterMark = CPEMI->getParent();
+ }
+
+ /// getMaxDisp - Returns the maximum displacement supported by MI.
+ unsigned getMaxDisp() const { return MaxDisp - 16; }
+
+ void setMaxDisp(unsigned Val) { MaxDisp = Val; }
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ /// CPEntry - One per constant pool entry, keeping the machine instruction
+ /// pointer, the constpool index, and the number of CPUser's which
+ /// reference this entry.
+ struct CPEntry {
+ MachineInstr *CPEMI;
+ unsigned CPI;
+ unsigned RefCount;
+
+ CPEntry(MachineInstr *Cpemi, unsigned Cpi, unsigned Rc = 0)
+ : CPEMI(Cpemi), CPI(Cpi), RefCount(Rc) {}
+ };
+
+ /// CPEntries - Keep track of all of the constant pool entry machine
+ /// instructions. For each original constpool index (i.e. those that
+ /// existed upon entry to this pass), it keeps a vector of entries.
+ /// Original elements are cloned as we go along; the clones are
+ /// put in the vector of the original element, but have distinct CPIs.
+ std::vector<std::vector<CPEntry>> CPEntries;
+
+ /// ImmBranch - One per immediate branch, keeping the machine instruction
+ /// pointer, conditional or unconditional, the max displacement,
+ /// and (if isCond is true) the corresponding unconditional branch
+ /// opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned MaxDisp : 31;
+ bool IsCond : 1;
+ int UncondBr;
+
+ ImmBranch(MachineInstr *Mi, unsigned Maxdisp, bool Cond, int Ubr)
+ : MI(Mi), MaxDisp(Maxdisp), IsCond(Cond), UncondBr(Ubr) {}
+ };
+
+ /// ImmBranches - Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ const CSKYSubtarget *STI = nullptr;
+ const CSKYInstrInfo *TII;
+ CSKYMachineFunctionInfo *MFI;
+ MachineFunction *MF = nullptr;
+ MachineConstantPool *MCP = nullptr;
+
+ unsigned PICLabelUId;
+
+ void initPICLabelUId(unsigned UId) { PICLabelUId = UId; }
+
+ unsigned createPICLabelUId() { return PICLabelUId++; }
+
+public:
+ static char ID;
+
+ CSKYConstantIslands() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "CSKY Constant Islands"; }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ void doInitialPlacement(std::vector<MachineInstr *> &CPEMIs);
+ CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ Align getCPEAlign(const MachineInstr &CPEMI);
+ void initializeFunctionInfo(const std::vector<MachineInstr *> &CPEMIs);
+ unsigned getOffsetOf(MachineInstr *MI) const;
+ unsigned getUserOffset(CPUser &) const;
+ void dumpBBs();
+
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, unsigned Disp,
+ bool NegativeOK);
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U);
+
+ void computeBlockSize(MachineBasicBlock *MBB);
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI);
+ void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+ bool decrementCPEReferenceCount(unsigned CPI, MachineInstr *CPEMI);
+ int findInRangeCPEntry(CPUser &U, unsigned UserOffset);
+ bool findAvailableWater(CPUser &U, unsigned UserOffset,
+ water_iterator &WaterIter);
+ void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ MachineBasicBlock *&NewMBB);
+ bool handleConstantPoolUser(unsigned CPUserIndex);
+ void removeDeadCPEMI(MachineInstr *CPEMI);
+ bool removeUnusedCPEntries();
+ bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+ bool DoDump = false);
+ bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water, CPUser &U,
+ unsigned &Growth);
+ bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool fixupImmediateBr(ImmBranch &Br);
+ bool fixupConditionalBr(ImmBranch &Br);
+ bool fixupUnconditionalBr(ImmBranch &Br);
+};
+} // end anonymous namespace
+
+char CSKYConstantIslands::ID = 0;
+
+bool CSKYConstantIslands::isOffsetInRange(unsigned UserOffset,
+ unsigned TrialOffset,
+ const CPUser &U) {
+ return isOffsetInRange(UserOffset, TrialOffset, U.getMaxDisp(), U.NegOk);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// print block size and offset information - debugging
+LLVM_DUMP_METHOD void CSKYConstantIslands::dumpBBs() {
+ for (unsigned J = 0, E = BBInfo.size(); J != E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x %bb.%u\t", BBI.Offset, J)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+}
+#endif
+
+bool CSKYConstantIslands::runOnMachineFunction(MachineFunction &Mf) {
+ MF = &Mf;
+ MCP = Mf.getConstantPool();
+ STI = &static_cast<const CSKYSubtarget &>(Mf.getSubtarget());
+
+ LLVM_DEBUG(dbgs() << "***** CSKYConstantIslands: "
+ << MCP->getConstants().size() << " CP entries, aligned to "
+ << MCP->getConstantPoolAlign().value() << " bytes *****\n");
+
+ TII = STI->getInstrInfo();
+ MFI = MF->getInfo<CSKYMachineFunctionInfo>();
+
+ // This pass invalidates liveness information when it splits basic blocks.
+ MF->getRegInfo().invalidateLiveness();
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF->RenumberBlocks();
+
+ bool MadeChange = false;
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr *> CPEMIs;
+ if (!MCP->isEmpty())
+ doInitialPlacement(CPEMIs);
+
+ /// The next UID to take is the first unused one.
+ initPICLabelUId(CPEMIs.size());
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ initializeFunctionInfo(CPEMIs);
+ CPEMIs.clear();
+ LLVM_DEBUG(dumpBBs());
+
+ /// Remove dead constant pool entries.
+ MadeChange |= removeUnusedCPEntries();
+
+ // Iteratively place constant pool entries and fix up branches until there
+ // is no change.
+ unsigned NoCPIters = 0, NoBRIters = 0;
+ (void)NoBRIters;
+ while (true) {
+ LLVM_DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
+ bool CPChange = false;
+ for (unsigned I = 0, E = CPUsers.size(); I != E; ++I)
+ CPChange |= handleConstantPoolUser(I);
+ if (CPChange && ++NoCPIters > 30)
+ report_fatal_error("Constant Island pass failed to converge!");
+ LLVM_DEBUG(dumpBBs());
+
+ // Clear NewWaterList now. If we split a block for branches, it should
+ // appear as "new water" for the next iteration of constant pool placement.
+ NewWaterList.clear();
+
+ LLVM_DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
+ bool BRChange = false;
+ for (unsigned I = 0, E = ImmBranches.size(); I != E; ++I)
+ BRChange |= fixupImmediateBr(ImmBranches[I]);
+ if (BRChange && ++NoBRIters > 30)
+ report_fatal_error("Branch Fix Up pass failed to converge!");
+ LLVM_DEBUG(dumpBBs());
+ if (!CPChange && !BRChange)
+ break;
+ MadeChange = true;
+ }
+
+ LLVM_DEBUG(dbgs() << '\n'; dumpBBs());
+
+ BBInfo.clear();
+ WaterList.clear();
+ CPUsers.clear();
+ CPEntries.clear();
+ ImmBranches.clear();
+ return MadeChange;
+}
+
+/// doInitialPlacement - Perform the initial placement of the constant pool
+/// entries. To start with, we put them all at the end of the function.
+void CSKYConstantIslands::doInitialPlacement(
+ std::vector<MachineInstr *> &CPEMIs) {
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+ MF->push_back(BB);
+
+ // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+ const Align MaxAlign = MCP->getConstantPoolAlign();
+
+ // Mark the basic block as required by the const-pool.
+ BB->setAlignment(Align(2));
+
+ // The function needs to be as aligned as the basic blocks. The linker may
+ // move functions around based on their alignment.
+ MF->ensureAlignment(BB->getAlignment());
+
+ // Order the entries in BB by descending alignment. That ensures correct
+ // alignment of all entries as long as BB is sufficiently aligned. Keep
+ // track of the insertion point for each alignment. We are going to bucket
+ // sort the entries as they are created.
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(Log2(MaxAlign) + 1,
+ BB->end());
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
+
+ const DataLayout &TD = MF->getDataLayout();
+ for (unsigned I = 0, E = CPs.size(); I != E; ++I) {
+ unsigned Size = CPs[I].getSizeInBytes(TD);
+ assert(Size >= 4 && "Too small constant pool entry");
+ Align Alignment = CPs[I].getAlign();
+ // Verify that all constant pool entries are a multiple of their alignment.
+ // If not, we would have to pad them out so that instructions stay aligned.
+ assert(isAligned(Alignment, Size) && "CP Entry not multiple of 4 bytes!");
+
+ // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+ unsigned LogAlign = Log2(Alignment);
+ MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
+
+ MachineInstr *CPEMI =
+ BuildMI(*BB, InsAt, DebugLoc(), TII->get(CSKY::CONSTPOOL_ENTRY))
+ .addImm(I)
+ .addConstantPoolIndex(I)
+ .addImm(Size);
+
+ CPEMIs.push_back(CPEMI);
+
+ // Ensure that future entries with higher alignment get inserted before
+ // CPEMI. This is bucket sort with iterators.
+ for (unsigned A = LogAlign + 1; A <= Log2(MaxAlign); ++A)
+ if (InsPoint[A] == InsAt)
+ InsPoint[A] = CPEMI;
+ // Add a new CPEntry, but no corresponding CPUser yet.
+ CPEntries.emplace_back(1, CPEntry(CPEMI, I));
+ ++NumCPEs;
+ LLVM_DEBUG(dbgs() << "Moved CPI#" << I << " to end of function, size = "
+ << Size << ", align = " << Alignment.value() << '\n');
+ }
+ LLVM_DEBUG(BB->dump());
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool bbHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB->getIterator();
+ // Can't fall off end of function.
+ if (std::next(MBBI) == MBB->getParent()->end())
+ return false;
+
+ MachineBasicBlock *NextBB = &*std::next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end();
+ I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+CSKYConstantIslands::CPEntry *
+CSKYConstantIslands::findConstPoolEntry(unsigned CPI,
+ const MachineInstr *CPEMI) {
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ // Number of entries per constpool index should be small, just do a
+ // linear search.
+ for (unsigned I = 0, E = CPEs.size(); I != E; ++I) {
+ if (CPEs[I].CPEMI == CPEMI)
+ return &CPEs[I];
+ }
+ return nullptr;
+}
+
+/// getCPEAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI. Alignment is measured in log2(bytes) units.
+Align CSKYConstantIslands::getCPEAlign(const MachineInstr &CPEMI) {
+ assert(CPEMI.getOpcode() == CSKY::CONSTPOOL_ENTRY);
+
+ unsigned CPI = CPEMI.getOperand(1).getIndex();
+ assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+ return MCP->getConstants()[CPI].getAlign();
+}
+
+/// initializeFunctionInfo - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void CSKYConstantIslands::initializeFunctionInfo(
+ const std::vector<MachineInstr *> &CPEMIs) {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ computeBlockSize(&*I);
+
+ // Compute block offsets.
+ adjustBBOffsetsAfter(&MF->front());
+
+ // Now go back through the instructions and build up our data structures.
+ for (MachineBasicBlock &MBB : *MF) {
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!bbHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+ for (MachineInstr &MI : MBB) {
+ if (MI.isDebugInstr())
+ continue;
+
+ int Opc = MI.getOpcode();
+ if (MI.isBranch() && !MI.isIndirectBranch()) {
+ bool IsCond = MI.isConditionalBranch();
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ int UOpc = CSKY::BR32;
+
+ switch (MI.getOpcode()) {
+ case CSKY::BR16:
+ case CSKY::BF16:
+ case CSKY::BT16:
+ Bits = 10;
+ Scale = 2;
+ break;
+ default:
+ Bits = 16;
+ Scale = 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ unsigned MaxOffs = ((1 << (Bits - 1)) - 1) * Scale;
+ ImmBranches.push_back(ImmBranch(&MI, MaxOffs, IsCond, UOpc));
+ }
+
+ if (Opc == CSKY::CONSTPOOL_ENTRY)
+ continue;
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned Op = 0, E = MI.getNumOperands(); Op != E; ++Op)
+ if (MI.getOperand(Op).isCPI()) {
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+
+ // Basic size info comes from the TSFlags field.
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ bool NegOk = false;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown addressing mode for CP reference!");
+ case CSKY::MOVIH32:
+ case CSKY::ORI32:
+ continue;
+ case CSKY::PseudoTLSLA32:
+ case CSKY::JSRI32:
+ case CSKY::JMPI32:
+ case CSKY::LRW32:
+ case CSKY::LRW32_Gen:
+ Bits = 16;
+ Scale = 4;
+ break;
+ case CSKY::f2FLRW_S:
+ case CSKY::f2FLRW_D:
+ Bits = 8;
+ Scale = 4;
+ break;
+ case CSKY::GRS32:
+ Bits = 17;
+ Scale = 2;
+ NegOk = true;
+ break;
+ }
+ // Remember that this is a user of a CP entry.
+ unsigned CPI = MI.getOperand(Op).getIndex();
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+ CPUsers.push_back(CPUser(&MI, CPEMI, MaxOffs, NegOk));
+
+ // Increment corresponding CPEntry reference count.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Cannot find a corresponding CPEntry!");
+ CPE->RefCount++;
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+ }
+ }
+ }
+}
+
+/// computeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void CSKYConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+
+ for (const MachineInstr &MI : *MBB)
+ BBI.Size += TII->getInstSizeInBytes(MI);
+}
+
+/// getOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned CSKYConstantIslands::getOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->getInstSizeInBytes(*I);
+ }
+ return Offset;
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool compareMbbNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// updateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void CSKYConstantIslands::updateForInsertedWaterBlock(
+ MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consecutive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ water_iterator IP = llvm::lower_bound(WaterList, NewBB, compareMbbNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+unsigned CSKYConstantIslands::getUserOffset(CPUser &U) const {
+ unsigned UserOffset = getOffsetOf(U.MI);
+
+ UserOffset &= ~3u;
+
+ return UserOffset;
+}
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *
+CSKYConstantIslands::splitBlockBeforeInstr(MachineInstr &MI) {
+ MachineBasicBlock *OrigBB = MI.getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = ++OrigBB->getIterator();
+ MF->insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+
+ // TODO: Add support for 16bit instr.
+ BuildMI(OrigBB, DebugLoc(), TII->get(CSKY::BR32)).addMBB(NewBB);
+ ++NumSplit;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ NewBB->transferSuccessors(OrigBB);
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ // This is almost the same as updateForInsertedWaterBlock, except that
+ // the Water goes after OrigBB, not NewBB.
+ MF->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Next, update WaterList. Specifically, we need to add OrigMBB as having
+ // available water after it (but not if it's already there, which happens
+ // when splitting before a conditional branch that is followed by an
+ // unconditional branch - in that case we want to insert NewBB).
+ water_iterator IP = llvm::lower_bound(WaterList, OrigBB, compareMbbNumbers);
+ MachineBasicBlock *WaterBB = *IP;
+ if (WaterBB == OrigBB)
+ WaterList.insert(std::next(IP), NewBB);
+ else
+ WaterList.insert(IP, OrigBB);
+ NewWaterList.insert(OrigBB);
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ computeBlockSize(OrigBB);
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ computeBlockSize(NewBB);
+
+ // All BBOffsets following these blocks must be modified.
+ adjustBBOffsetsAfter(OrigBB);
+
+ return NewBB;
+}
+
+/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool CSKYConstantIslands::isOffsetInRange(unsigned UserOffset,
+ unsigned TrialOffset,
+ unsigned MaxDisp, bool NegativeOK) {
+ if (UserOffset <= TrialOffset) {
+ // User before the Trial.
+ if (TrialOffset - UserOffset <= MaxDisp)
+ return true;
+ } else if (NegativeOK) {
+ if (UserOffset - TrialOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// isWaterInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
+bool CSKYConstantIslands::isWaterInRange(unsigned UserOffset,
+ MachineBasicBlock *Water, CPUser &U,
+ unsigned &Growth) {
+ unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset();
+ unsigned NextBlockOffset;
+ Align NextBlockAlignment;
+ MachineFunction::const_iterator NextBlock = ++Water->getIterator();
+ if (NextBlock == MF->end()) {
+ NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+ NextBlockAlignment = Align(4);
+ } else {
+ NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+ NextBlockAlignment = NextBlock->getAlignment();
+ }
+ unsigned Size = U.CPEMI->getOperand(2).getImm();
+ unsigned CPEEnd = CPEOffset + Size;
+
+ // The CPE may be able to hide in the alignment padding before the next
+ // block. It may also cause more padding to be required if it is more aligned
+ // that the next block.
+ if (CPEEnd > NextBlockOffset) {
+ Growth = CPEEnd - NextBlockOffset;
+ // Compute the padding that would go at the end of the CPE to align the next
+ // block.
+ Growth += offsetToAlignment(CPEEnd, NextBlockAlignment);
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. Also account for unknown alignment padding
+ // in blocks between CPE and the user.
+ if (CPEOffset < UserOffset)
+ UserOffset += Growth;
+ } else
+ // CPE fits in existing padding.
+ Growth = 0;
+
+ return isOffsetInRange(UserOffset, CPEOffset, U);
+}
+
+/// isCPEntryInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool CSKYConstantIslands::isCPEntryInRange(MachineInstr *MI,
+ unsigned UserOffset,
+ MachineInstr *CPEMI,
+ unsigned MaxDisp, bool NegOk,
+ bool DoDump) {
+ unsigned CPEOffset = getOffsetOf(CPEMI);
+
+ if (DoDump) {
+ LLVM_DEBUG({
+ unsigned Block = MI->getParent()->getNumber();
+ const BasicBlockInfo &BBI = BBInfo[Block];
+ dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << format(" insn address=%#x", UserOffset) << " in "
+ << printMBBReference(*MI->getParent()) << ": "
+ << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+ << format("CPE address=%#x offset=%+d: ", CPEOffset,
+ int(CPEOffset - UserOffset));
+ });
+ }
+
+ return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool bbIsJumpedOver(MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineInstr *PredMI = &Pred->back();
+ if (PredMI->getOpcode() == CSKY::BR32 /*TODO: change to 16bit instr. */)
+ return PredMI->getOperand(0).getMBB() == Succ;
+ return false;
+}
+#endif
+
+void CSKYConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ unsigned BBNum = BB->getNumber();
+ for (unsigned I = BBNum + 1, E = MF->getNumBlockIDs(); I < E; ++I) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned Offset = BBInfo[I - 1].Offset + BBInfo[I - 1].Size;
+ BBInfo[I].Offset = Offset;
+ }
+}
+
+/// decrementCPEReferenceCount - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount. If the refcount
+/// becomes 0 remove the entry and instruction. Returns true if we removed
+/// the entry, false if we didn't.
+bool CSKYConstantIslands::decrementCPEReferenceCount(unsigned CPI,
+ MachineInstr *CPEMI) {
+ // Find the old entry. Eliminate it if it is no longer used.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Unexpected!");
+ if (--CPE->RefCount == 0) {
+ removeDeadCPEMI(CPEMI);
+ CPE->CPEMI = nullptr;
+ --NumCPEs;
+ return true;
+ }
+ return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int CSKYConstantIslands::findInRangeCPEntry(CPUser &U, unsigned UserOffset) {
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
+ true)) {
+ LLVM_DEBUG(dbgs() << "In range\n");
+ return 1;
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned I = 0, E = CPEs.size(); I != E; ++I) {
+ // We already tried this one
+ if (CPEs[I].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[I].CPEMI == nullptr)
+ continue;
+ if (isCPEntryInRange(UserMI, UserOffset, CPEs[I].CPEMI, U.getMaxDisp(),
+ U.NegOk)) {
+ LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
+ << CPEs[I].CPI << "\n");
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[I].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned J = 0, E = UserMI->getNumOperands(); J != E; ++J)
+ if (UserMI->getOperand(J).isCPI()) {
+ UserMI->getOperand(J).setIndex(CPEs[I].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[I].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+ unsigned Bits, Scale;
+
+ switch (Opc) {
+ case CSKY::BR16:
+ Bits = 10;
+ Scale = 2;
+ break;
+ case CSKY::BR32:
+ Bits = 16;
+ Scale = 2;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ unsigned MaxOffs = ((1 << (Bits - 1)) - 1) * Scale;
+ return MaxOffs;
+}
+
+/// findAvailableWater - Look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not. If it returns true, WaterIter
+/// is set to the WaterList entry.
+/// To ensure that this pass
+/// terminates, the CPE location for a particular CPUser is only allowed to
+/// move to a lower address, so search backward from the end of the list and
+/// prefer the first water that is in range.
+bool CSKYConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
+ water_iterator &WaterIter) {
+ if (WaterList.empty())
+ return false;
+
+ unsigned BestGrowth = ~0u;
+ for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();;
+ --IP) {
+ MachineBasicBlock *WaterBB = *IP;
+ // Check if water is in range and is either at a lower address than the
+ // current "high water mark" or a new water block that was created since
+ // the previous iteration by inserting an unconditional branch. In the
+ // latter case, we want to allow resetting the high water mark back to
+ // this new water since we haven't seen it before. Inserting branches
+ // should be relatively uncommon and when it does happen, we want to be
+ // sure to take advantage of it for all the CPEs near that block, so that
+ // we don't insert more branches than necessary.
+ unsigned Growth;
+ if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
+ (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+ NewWaterList.count(WaterBB)) &&
+ Growth < BestGrowth) {
+ // This is the least amount of required padding seen so far.
+ BestGrowth = Growth;
+ WaterIter = IP;
+ LLVM_DEBUG(dbgs() << "Found water after " << printMBBReference(*WaterBB)
+ << " Growth=" << Growth << '\n');
+
+ // Keep looking unless it is perfect.
+ if (BestGrowth == 0)
+ return true;
+ }
+ if (IP == B)
+ break;
+ }
+ return BestGrowth != ~0u;
+}
+
+/// createNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct. Otherwise the block is split to create a hole with an
+/// unconditional branch around it. In either case NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+void CSKYConstantIslands::createNewWater(unsigned CPUserIndex,
+ unsigned UserOffset,
+ MachineBasicBlock *&NewMBB) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
+
+ // If the block does not end in an unconditional branch already, and if the
+ // end of the block is within range, make new water there.
+ if (bbHasFallthrough(UserMBB)) {
+ // Size of branch to insert.
+ unsigned Delta = 4;
+ // Compute the offset where the CPE will begin.
+ unsigned CPEOffset = UserBBI.postOffset() + Delta;
+
+ if (isOffsetInRange(UserOffset, CPEOffset, U)) {
+ LLVM_DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB)
+ << format(", expected CPE offset %#x\n", CPEOffset));
+ NewMBB = &*++UserMBB->getIterator();
+ // Add an unconditional branch from UserMBB to fallthrough block. Record
+ // it for branch lengthening; this new branch will not get out of range,
+ // but if the preceding conditional branch is out of range, the targets
+ // will be exchanged, and the altered branch may be out of range, so the
+ // machinery has to know about it.
+
+ // TODO: Add support for 16bit instr.
+ int UncondBr = CSKY::BR32;
+ auto *NewMI = BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr))
+ .addMBB(NewMBB)
+ .getInstr();
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(
+ ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr));
+ BBInfo[UserMBB->getNumber()].Size += TII->getInstSizeInBytes(*NewMI);
+ adjustBBOffsetsAfter(UserMBB);
+ return;
+ }
+ }
+
+ // What a big block. Find a place within the block to split it.
+
+ // Try to split the block so it's fully aligned. Compute the latest split
+ // point where we can add a 4-byte branch instruction, and then align to
+ // Align which is the largest possible alignment in the function.
+ const Align Align = MF->getAlignment();
+ unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
+ LLVM_DEBUG(dbgs() << format("Split in middle of big block before %#x",
+ BaseInsertOffset));
+
+ // The 4 in the following is for the unconditional branch we'll be inserting
+ // Alignment of the island is handled
+ // inside isOffsetInRange.
+ BaseInsertOffset -= 4;
+
+ LLVM_DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+ << " la=" << Log2(Align) << '\n');
+
+ // This could point off the end of the block if we've already got constant
+ // pool entries following this block; only the last one is in the water list.
+ // Back past any possible branches (allow for a conditional and a maximally
+ // long unconditional).
+ if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
+ BaseInsertOffset = UserBBI.postOffset() - 8;
+ LLVM_DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
+ }
+ unsigned EndInsertOffset =
+ BaseInsertOffset + 4 + CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex + 1;
+ unsigned NumCPUsers = CPUsers.size();
+ for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->getInstSizeInBytes(*MI), MI = std::next(MI)) {
+ assert(MI != UserMBB->end() && "Fell off end of block");
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
+ // Shift intertion point by one unit of alignment so it is within reach.
+ BaseInsertOffset -= Align.value();
+ EndInsertOffset -= Align.value();
+ }
+ // This is overly conservative, as we don't account for CPEMIs being
+ // reused within the block, but it doesn't matter much. Also assume CPEs
+ // are added in order with alignment padding. We may eventually be able
+ // to pack the aligned CPEs better.
+ EndInsertOffset += U.CPEMI->getOperand(2).getImm();
+ CPUIndex++;
+ }
+ }
+
+ NewMBB = splitBlockBeforeInstr(*--MI);
+}
+
+/// handleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick up the constant pool value and move it some
+/// place in-range. Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool CSKYConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ // Compute this only once, it's expensive.
+ unsigned UserOffset = getUserOffset(U);
+
+ // See if the current entry is within range, or there is a clone of it
+ // in range.
+ int result = findInRangeCPEntry(U, UserOffset);
+ if (result == 1)
+ return false;
+ if (result == 2)
+ return true;
+
+ // Look for water where we can place this CPE.
+ MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *NewMBB;
+ water_iterator IP;
+ if (findAvailableWater(U, UserOffset, IP)) {
+ LLVM_DEBUG(dbgs() << "Found water in range\n");
+ MachineBasicBlock *WaterBB = *IP;
+
+ // If the original WaterList entry was "new water" on this iteration,
+ // propagate that to the new island. This is just keeping NewWaterList
+ // updated to match the WaterList, which will be updated below.
+ if (NewWaterList.erase(WaterBB))
+ NewWaterList.insert(NewIsland);
+
+ // The new CPE goes before the following block (NewMBB).
+ NewMBB = &*++WaterBB->getIterator();
+ } else {
+ LLVM_DEBUG(dbgs() << "No water found\n");
+ createNewWater(CPUserIndex, UserOffset, NewMBB);
+
+ // splitBlockBeforeInstr adds to WaterList, which is important when it is
+ // called while handling branches so that the water will be seen on the
+ // next iteration for constant pools, but in this context, we don't want
+ // it. Check for this so it will be removed from the WaterList.
+ // Also remove any entry from NewWaterList.
+ MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
+ IP = llvm::find(WaterList, WaterBB);
+ if (IP != WaterList.end())
+ NewWaterList.erase(WaterBB);
+
+ // We are adding new water. Update NewWaterList.
+ NewWaterList.insert(NewIsland);
+ }
+
+ // Remove the original WaterList entry; we want subsequent insertions in
+ // this vicinity to go after the one we're about to insert. This
+ // considerably reduces the number of times we have to move the same CPE
+ // more than once and is also important to ensure the algorithm terminates.
+ if (IP != WaterList.end())
+ WaterList.erase(IP);
+
+ // Okay, we know we can put an island before NewMBB now, do it!
+ MF->insert(NewMBB->getIterator(), NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ updateForInsertedWaterBlock(NewIsland);
+
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ decrementCPEReferenceCount(CPI, CPEMI);
+
+ // No existing clone of this CPE is within range.
+ // We will be generating a new clone. Get a UID for it.
+ unsigned ID = createPICLabelUId();
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ U.HighWaterMark = NewIsland;
+ U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(CSKY::CONSTPOOL_ENTRY))
+ .addImm(ID)
+ .addConstantPoolIndex(CPI)
+ .addImm(Size);
+ CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ ++NumCPEs;
+
+ // Mark the basic block as aligned as required by the const-pool entry.
+ NewIsland->setAlignment(getCPEAlign(*U.CPEMI));
+
+ // Increase the size of the island block to account for the new entry.
+ BBInfo[NewIsland->getNumber()].Size += Size;
+ adjustBBOffsetsAfter(&*--NewIsland->getIterator());
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned I = 0, E = UserMI->getNumOperands(); I != E; ++I)
+ if (UserMI->getOperand(I).isCPI()) {
+ UserMI->getOperand(I).setIndex(ID);
+ break;
+ }
+
+ LLVM_DEBUG(
+ dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
+
+ return true;
+}
+
+/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void CSKYConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
+ MachineBasicBlock *CPEBB = CPEMI->getParent();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ CPEMI->eraseFromParent();
+ BBInfo[CPEBB->getNumber()].Size -= Size;
+ // All succeeding offsets have the current size value added in, fix this.
+ if (CPEBB->empty()) {
+ BBInfo[CPEBB->getNumber()].Size = 0;
+
+ // This block no longer needs to be aligned.
+ CPEBB->setAlignment(Align(4));
+ } else {
+ // Entries are sorted by descending alignment, so realign from the front.
+ CPEBB->setAlignment(getCPEAlign(*CPEBB->begin()));
+ }
+
+ adjustBBOffsetsAfter(CPEBB);
+ // An island has only one predecessor BB and one successor BB. Check if
+ // this BB's predecessor jumps directly to this BB's successor. This
+ // shouldn't happen currently.
+ assert(!bbIsJumpedOver(CPEBB) && "How did this happen?");
+ // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// removeUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool CSKYConstantIslands::removeUnusedCPEntries() {
+ unsigned MadeChange = false;
+ for (unsigned I = 0, E = CPEntries.size(); I != E; ++I) {
+ std::vector<CPEntry> &CPEs = CPEntries[I];
+ for (unsigned J = 0, Ee = CPEs.size(); J != Ee; ++J) {
+ if (CPEs[J].RefCount == 0 && CPEs[J].CPEMI) {
+ removeDeadCPEMI(CPEs[J].CPEMI);
+ CPEs[J].CPEMI = nullptr;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// isBBInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool CSKYConstantIslands::isBBInRange(MachineInstr *MI,
+ MachineBasicBlock *DestBB,
+ unsigned MaxDisp) {
+ unsigned BrOffset = getOffsetOf(MI);
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+ LLVM_DEBUG(dbgs() << "Branch of destination " << printMBBReference(*DestBB)
+ << " from " << printMBBReference(*MI->getParent())
+ << " max delta=" << MaxDisp << " from " << getOffsetOf(MI)
+ << " to " << DestOffset << " offset "
+ << int(DestOffset - BrOffset) << "\t" << *MI);
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset - BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset - DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool CSKYConstantIslands::fixupImmediateBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(*MI);
+
+ // Check to see if the DestBB is already in-range.
+ if (isBBInRange(MI, DestBB, Br.MaxDisp))
+ return false;
+
+ if (!Br.IsCond)
+ return fixupUnconditionalBr(Br);
+ return fixupConditionalBr(Br);
+}
+
+/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BSR to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool CSKYConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+
+ if (!MFI->isLRSpilled())
+ report_fatal_error("underestimated function size");
+
+ // Use BSR to implement far jump.
+ Br.MaxDisp = ((1 << (26 - 1)) - 1) * 2;
+ MI->setDesc(TII->get(CSKY::BSR32_BR));
+ BBInfo[MBB->getNumber()].Size += 4;
+ adjustBBOffsetsAfter(MBB);
+ ++NumUBrFixed;
+
+ LLVM_DEBUG(dbgs() << " Changed B to long jump " << *MI);
+
+ return true;
+}
+
+/// fixupConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool CSKYConstantIslands::fixupConditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(*MI);
+
+ SmallVector<MachineOperand, 4> Cond;
+ Cond.push_back(MachineOperand::CreateImm(MI->getOpcode()));
+ Cond.push_back(MI->getOperand(0));
+ TII->reverseBranchCondition(Cond);
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // bteqz L1
+ // =>
+ // bnez L2
+ // b L1
+ // L2:
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through block. Otherwise,
+ // split the MBB before the next instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !bbHasFallthrough(MBB);
+
+ ++NumCBrFixed;
+ if (BMI != MI) {
+ if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
+ BMI->isUnconditionalBranch()) {
+ // Last MI in the BB is an unconditional branch. Can we simply invert the
+ // condition and swap destinations:
+ // beqz L1
+ // b L2
+ // =>
+ // bnez L2
+ // b L1
+ MachineBasicBlock *NewDest = TII->getBranchDestBlock(*BMI);
+ if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
+ LLVM_DEBUG(
+ dbgs() << " Invert Bcc condition and swap its destination with "
+ << *BMI);
+ BMI->getOperand(BMI->getNumExplicitOperands() - 1).setMBB(DestBB);
+ MI->getOperand(MI->getNumExplicitOperands() - 1).setMBB(NewDest);
+
+ MI->setDesc(TII->get(Cond[0].getImm()));
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ splitBlockBeforeInstr(*MI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int Delta = TII->getInstSizeInBytes(MBB->back());
+ BBInfo[MBB->getNumber()].Size -= Delta;
+ MBB->back().eraseFromParent();
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
+
+ // The conditional successor will be swapped between the BBs after this, so
+ // update CFG.
+ MBB->addSuccessor(DestBB);
+ std::next(MBB->getIterator())->removeSuccessor(DestBB);
+ }
+ MachineBasicBlock *NextBB = &*++MBB->getIterator();
+
+ LLVM_DEBUG(dbgs() << " Insert B to " << printMBBReference(*DestBB)
+ << " also invert condition and change dest. to "
+ << printMBBReference(*NextBB) << "\n");
+
+ // Insert a new conditional branch and a new unconditional branch.
+ // Also update the ImmBranch as well as adding a new entry for the new branch.
+
+ BuildMI(MBB, DebugLoc(), TII->get(Cond[0].getImm()))
+ .addReg(MI->getOperand(0).getReg())
+ .addMBB(NextBB);
+
+ Br.MI = &MBB->back();
+ BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
+ unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+ ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+ // Remove the old conditional branch. It may or may not still be in MBB.
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->getInstSizeInBytes(*MI);
+ MI->eraseFromParent();
+ adjustBBOffsetsAfter(MBB);
+ return true;
+}
+
+/// Returns a pass that converts branches to long branches.
+FunctionPass *llvm::createCSKYConstantIslandPass() {
+ return new CSKYConstantIslands();
+}
+
+INITIALIZE_PASS(CSKYConstantIslands, DEBUG_TYPE,
+ "CSKY constant island placement and branch shortening pass",
+ false, false)
diff --git a/llvm/lib/Target/CSKY/CSKYConstantPoolValue.cpp b/llvm/lib/Target/CSKY/CSKYConstantPoolValue.cpp
new file mode 100644
index 000000000000..d4c4bb847237
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYConstantPoolValue.cpp
@@ -0,0 +1,216 @@
+//===-- CSKYConstantPoolValue.cpp - CSKY constantpool value ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CSKY specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// CSKYConstantPoolValue
+//===----------------------------------------------------------------------===//
+
+CSKYConstantPoolValue::CSKYConstantPoolValue(Type *Ty, CSKYCP::CSKYCPKind Kind,
+ unsigned PCAdjust,
+ CSKYCP::CSKYCPModifier Modifier,
+ bool AddCurrentAddress,
+ unsigned ID)
+ : MachineConstantPoolValue(Ty), Kind(Kind), PCAdjust(PCAdjust),
+ Modifier(Modifier), AddCurrentAddress(AddCurrentAddress), LabelId(ID) {}
+
+const char *CSKYConstantPoolValue::getModifierText() const {
+ switch (Modifier) {
+ case CSKYCP::ADDR:
+ return "ADDR";
+ case CSKYCP::GOT:
+ return "GOT";
+ case CSKYCP::GOTOFF:
+ return "GOTOFF";
+ case CSKYCP::PLT:
+ return "PLT";
+ case CSKYCP::TLSIE:
+ return "TLSIE";
+ case CSKYCP::TLSLE:
+ return "TLSLE";
+ case CSKYCP::TLSGD:
+ return "TLSGD";
+ case CSKYCP::NO_MOD:
+ return "";
+ }
+ llvm_unreachable("Unknown modifier!");
+}
+
+int CSKYConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) {
+ llvm_unreachable("Shouldn't be calling this directly!");
+}
+
+void CSKYConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddInteger(LabelId);
+ ID.AddInteger(PCAdjust);
+ ID.AddInteger(Modifier);
+}
+
+void CSKYConstantPoolValue::print(raw_ostream &O) const {
+ if (Modifier)
+ O << "(" << getModifierText() << ")";
+ if (PCAdjust)
+ O << " + " << PCAdjust;
+}
+
+//===----------------------------------------------------------------------===//
+// CSKYConstantPoolConstant
+//===----------------------------------------------------------------------===//
+
+CSKYConstantPoolConstant::CSKYConstantPoolConstant(
+ const Constant *C, CSKYCP::CSKYCPKind Kind, unsigned PCAdjust,
+ CSKYCP::CSKYCPModifier Modifier, bool AddCurrentAddress, unsigned ID)
+ : CSKYConstantPoolValue(C->getType(), Kind, PCAdjust, Modifier,
+ AddCurrentAddress, ID),
+ CVal(C) {}
+
+CSKYConstantPoolConstant *CSKYConstantPoolConstant::Create(
+ const Constant *C, CSKYCP::CSKYCPKind Kind, unsigned PCAdjust,
+ CSKYCP::CSKYCPModifier Modifier, bool AddCurrentAddress, unsigned ID) {
+ return new CSKYConstantPoolConstant(C, Kind, PCAdjust, Modifier,
+ AddCurrentAddress, ID);
+}
+
+const GlobalValue *CSKYConstantPoolConstant::getGV() const {
+ assert(isa<GlobalValue>(CVal) && "CVal should be GlobalValue");
+ return cast<GlobalValue>(CVal);
+}
+
+const BlockAddress *CSKYConstantPoolConstant::getBlockAddress() const {
+ assert(isa<BlockAddress>(CVal) && "CVal should be BlockAddress");
+ return cast<BlockAddress>(CVal);
+}
+
+int CSKYConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) {
+ return getExistingMachineCPValueImpl<CSKYConstantPoolConstant>(CP, Alignment);
+}
+
+void CSKYConstantPoolConstant::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(CVal);
+
+ CSKYConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void CSKYConstantPoolConstant::print(raw_ostream &O) const {
+ O << CVal->getName();
+ CSKYConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// CSKYConstantPoolSymbol
+//===----------------------------------------------------------------------===//
+
+CSKYConstantPoolSymbol::CSKYConstantPoolSymbol(Type *Ty, const char *S,
+ unsigned PCAdjust,
+ CSKYCP::CSKYCPModifier Modifier,
+ bool AddCurrentAddress)
+ : CSKYConstantPoolValue(Ty, CSKYCP::CPExtSymbol, PCAdjust, Modifier,
+ AddCurrentAddress),
+ S(strdup(S)) {}
+
+CSKYConstantPoolSymbol *
+CSKYConstantPoolSymbol::Create(Type *Ty, const char *S, unsigned PCAdjust,
+ CSKYCP::CSKYCPModifier Modifier) {
+ return new CSKYConstantPoolSymbol(Ty, S, PCAdjust, Modifier, false);
+}
+
+int CSKYConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) {
+
+ return getExistingMachineCPValueImpl<CSKYConstantPoolSymbol>(CP, Alignment);
+}
+
+void CSKYConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddString(S);
+ CSKYConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void CSKYConstantPoolSymbol::print(raw_ostream &O) const {
+ O << S;
+ CSKYConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// CSKYConstantPoolMBB
+//===----------------------------------------------------------------------===//
+
+CSKYConstantPoolMBB::CSKYConstantPoolMBB(Type *Ty, const MachineBasicBlock *Mbb,
+ unsigned PCAdjust,
+ CSKYCP::CSKYCPModifier Modifier,
+ bool AddCurrentAddress)
+ : CSKYConstantPoolValue(Ty, CSKYCP::CPMachineBasicBlock, PCAdjust, Modifier,
+ AddCurrentAddress),
+ MBB(Mbb) {}
+
+CSKYConstantPoolMBB *CSKYConstantPoolMBB::Create(Type *Ty,
+ const MachineBasicBlock *Mbb,
+ unsigned PCAdjust) {
+ return new CSKYConstantPoolMBB(Ty, Mbb, PCAdjust, CSKYCP::ADDR, false);
+}
+
+int CSKYConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) {
+ return getExistingMachineCPValueImpl<CSKYConstantPoolMBB>(CP, Alignment);
+}
+
+void CSKYConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(MBB);
+ CSKYConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void CSKYConstantPoolMBB::print(raw_ostream &O) const {
+ O << "BB#" << MBB->getNumber();
+ CSKYConstantPoolValue::print(O);
+}
+
+//===----------------------------------------------------------------------===//
+// CSKYConstantPoolJT
+//===----------------------------------------------------------------------===//
+
+CSKYConstantPoolJT::CSKYConstantPoolJT(Type *Ty, int JTIndex, unsigned PCAdj,
+ CSKYCP::CSKYCPModifier Modifier,
+ bool AddCurrentAddress)
+ : CSKYConstantPoolValue(Ty, CSKYCP::CPJT, PCAdj, Modifier,
+ AddCurrentAddress),
+ JTI(JTIndex) {}
+
+CSKYConstantPoolJT *
+CSKYConstantPoolJT::Create(Type *Ty, int JTI, unsigned PCAdj,
+ CSKYCP::CSKYCPModifier Modifier) {
+ return new CSKYConstantPoolJT(Ty, JTI, PCAdj, Modifier, false);
+}
+
+int CSKYConstantPoolJT::getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) {
+ return getExistingMachineCPValueImpl<CSKYConstantPoolJT>(CP, Alignment);
+}
+
+void CSKYConstantPoolJT::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddInteger(JTI);
+ CSKYConstantPoolValue::addSelectionDAGCSEId(ID);
+}
+
+void CSKYConstantPoolJT::print(raw_ostream &O) const {
+ O << "JTI#" << JTI;
+ CSKYConstantPoolValue::print(O);
+}
diff --git a/llvm/lib/Target/CSKY/CSKYConstantPoolValue.h b/llvm/lib/Target/CSKY/CSKYConstantPoolValue.h
new file mode 100644
index 000000000000..2eff9404a34c
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYConstantPoolValue.h
@@ -0,0 +1,221 @@
+//===-- CSKYConstantPoolValue.h - CSKY constantpool value -----*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CSKY specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_CSKY_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_CSKY_CONSTANTPOOLVALUE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+
+namespace llvm {
+
+class BlockAddress;
+class Constant;
+class GlobalValue;
+class LLVMContext;
+class MachineBasicBlock;
+
+namespace CSKYCP {
+enum CSKYCPKind {
+ CPValue,
+ CPExtSymbol,
+ CPBlockAddress,
+ CPMachineBasicBlock,
+ CPJT
+};
+
+enum CSKYCPModifier { NO_MOD, ADDR, GOT, GOTOFF, PLT, TLSLE, TLSIE, TLSGD };
+} // namespace CSKYCP
+
+/// CSKYConstantPoolValue - CSKY specific constantpool value. This is used to
+/// represent PC-relative displacement between the address of the load
+/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
+class CSKYConstantPoolValue : public MachineConstantPoolValue {
+protected:
+ CSKYCP::CSKYCPKind Kind; // Kind of constant.
+ unsigned PCAdjust; // Extra adjustment if constantpool is pc-relative.
+ CSKYCP::CSKYCPModifier Modifier; // GV modifier
+ bool AddCurrentAddress;
+
+ unsigned LabelId = 0;
+
+ CSKYConstantPoolValue(Type *Ty, CSKYCP::CSKYCPKind Kind, unsigned PCAdjust,
+ CSKYCP::CSKYCPModifier Modifier, bool AddCurrentAddress,
+ unsigned ID = 0);
+
+public:
+ const char *getModifierText() const;
+ unsigned getPCAdjustment() const { return PCAdjust; }
+ bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+ CSKYCP::CSKYCPModifier getModifier() const { return Modifier; }
+ unsigned getLabelID() const { return LabelId; }
+
+ bool isGlobalValue() const { return Kind == CSKYCP::CPValue; }
+ bool isExtSymbol() const { return Kind == CSKYCP::CPExtSymbol; }
+ bool isBlockAddress() const { return Kind == CSKYCP::CPBlockAddress; }
+ bool isMachineBasicBlock() const {
+ return Kind == CSKYCP::CPMachineBasicBlock;
+ }
+ bool isJT() const { return Kind == CSKYCP::CPJT; }
+
+ int getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) override;
+
+ void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
+
+ void print(raw_ostream &O) const override;
+
+ bool equals(const CSKYConstantPoolValue *A) const {
+ return this->LabelId == A->LabelId && this->PCAdjust == A->PCAdjust &&
+ this->Modifier == A->Modifier;
+ }
+
+ template <typename Derived>
+ int getExistingMachineCPValueImpl(MachineConstantPool *CP, Align Alignment) {
+ const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ Constants[i].getAlign() >= Alignment) {
+ auto *CPV =
+ static_cast<CSKYConstantPoolValue *>(Constants[i].Val.MachineCPVal);
+ if (Derived *APC = dyn_cast<Derived>(CPV))
+ if (cast<Derived>(this)->equals(APC))
+ return i;
+ }
+ }
+
+ return -1;
+ }
+};
+
+/// CSKY-specific constant pool values for Constants,
+/// Functions, and BlockAddresses.
+class CSKYConstantPoolConstant : public CSKYConstantPoolValue {
+ const Constant *CVal; // Constant being loaded.
+
+ CSKYConstantPoolConstant(const Constant *C, CSKYCP::CSKYCPKind Kind,
+ unsigned PCAdjust, CSKYCP::CSKYCPModifier Modifier,
+ bool AddCurrentAddress, unsigned ID);
+
+public:
+ static CSKYConstantPoolConstant *
+ Create(const Constant *C, CSKYCP::CSKYCPKind Kind, unsigned PCAdjust,
+ CSKYCP::CSKYCPModifier Modifier, bool AddCurrentAddress,
+ unsigned ID = 0);
+ const GlobalValue *getGV() const;
+ const BlockAddress *getBlockAddress() const;
+
+ int getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) override;
+ void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
+ void print(raw_ostream &O) const override;
+
+ bool equals(const CSKYConstantPoolConstant *A) const {
+ return CVal == A->CVal && CSKYConstantPoolValue::equals(A);
+ }
+
+ static bool classof(const CSKYConstantPoolValue *APV) {
+ return APV->isGlobalValue() || APV->isBlockAddress();
+ }
+};
+
+/// CSKYConstantPoolSymbol - CSKY-specific constantpool values for external
+/// symbols.
+class CSKYConstantPoolSymbol : public CSKYConstantPoolValue {
+ const std::string S; // ExtSymbol being loaded.
+
+ CSKYConstantPoolSymbol(Type *Ty, const char *S, unsigned PCAdjust,
+ CSKYCP::CSKYCPModifier Modifier,
+ bool AddCurrentAddress);
+
+public:
+ static CSKYConstantPoolSymbol *Create(Type *Ty, const char *S,
+ unsigned PCAdjust,
+ CSKYCP::CSKYCPModifier Modifier);
+
+ StringRef getSymbol() const { return S; }
+
+ int getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) override;
+ void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
+ void print(raw_ostream &O) const override;
+
+ bool equals(const CSKYConstantPoolSymbol *A) const {
+ return S == A->S && CSKYConstantPoolValue::equals(A);
+ }
+
+ static bool classof(const CSKYConstantPoolValue *ACPV) {
+ return ACPV->isExtSymbol();
+ }
+};
+
+/// CSKYConstantPoolMBB - CSKY-specific constantpool value of a machine basic
+/// block.
+class CSKYConstantPoolMBB : public CSKYConstantPoolValue {
+ const MachineBasicBlock *MBB; // Machine basic block.
+
+ CSKYConstantPoolMBB(Type *Ty, const MachineBasicBlock *Mbb, unsigned PCAdjust,
+ CSKYCP::CSKYCPModifier Modifier, bool AddCurrentAddress);
+
+public:
+ static CSKYConstantPoolMBB *Create(Type *Ty, const MachineBasicBlock *Mbb,
+ unsigned PCAdjust);
+
+ const MachineBasicBlock *getMBB() const { return MBB; }
+
+ int getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) override;
+ void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
+ void print(raw_ostream &O) const override;
+
+ bool equals(const CSKYConstantPoolMBB *A) const {
+ return MBB == A->MBB && CSKYConstantPoolValue::equals(A);
+ }
+
+ static bool classof(const CSKYConstantPoolValue *ACPV) {
+ return ACPV->isMachineBasicBlock();
+ }
+};
+
+/// CSKY-specific constantpool value of a jump table.
+class CSKYConstantPoolJT : public CSKYConstantPoolValue {
+ signed JTI; // Machine basic block.
+
+ CSKYConstantPoolJT(Type *Ty, int JTIndex, unsigned PCAdj,
+ CSKYCP::CSKYCPModifier Modifier, bool AddCurrentAddress);
+
+public:
+ static CSKYConstantPoolJT *Create(Type *Ty, int JTI, unsigned PCAdj,
+ CSKYCP::CSKYCPModifier Modifier);
+
+ signed getJTI() { return JTI; }
+
+ int getExistingMachineCPValue(MachineConstantPool *CP,
+ Align Alignment) override;
+ void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
+ void print(raw_ostream &O) const override;
+
+ bool equals(const CSKYConstantPoolJT *A) const {
+ return JTI == A->JTI && CSKYConstantPoolValue::equals(A);
+ }
+
+ static bool classof(const CSKYConstantPoolValue *ACPV) {
+ return ACPV->isJT();
+ }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
index 3a8ee5713584..3bf001c2cee7 100644
--- a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CSKYFrameLowering.h"
+#include "CSKYMachineFunctionInfo.h"
#include "CSKYSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -46,12 +47,555 @@ bool CSKYFrameLowering::hasBP(const MachineFunction &MF) const {
return MFI.hasVarSizedObjects();
}
+// Determines the size of the frame and maximum call frame size.
+void CSKYFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const CSKYRegisterInfo *RI = STI.getRegisterInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t FrameSize = MFI.getStackSize();
+
+ // Get the alignment.
+ Align StackAlign = getStackAlign();
+ if (RI->hasStackRealignment(MF)) {
+ Align MaxStackAlign = std::max(StackAlign, MFI.getMaxAlign());
+ FrameSize += (MaxStackAlign.value() - StackAlign.value());
+ StackAlign = MaxStackAlign;
+ }
+
+ // Set Max Call Frame Size
+ uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign);
+ MFI.setMaxCallFrameSize(MaxCallSize);
+
+ // Make sure the frame is aligned.
+ FrameSize = alignTo(FrameSize, StackAlign);
+
+ // Update frame info.
+ MFI.setStackSize(FrameSize);
+}
+
void CSKYFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- // FIXME: Implement this when we have function calls
+ CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const CSKYRegisterInfo *RI = STI.getRegisterInfo();
+ const CSKYInstrInfo *TII = STI.getInstrInfo();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ Register FPReg = getFPReg(STI);
+ Register SPReg = CSKY::R14;
+ Register BPReg = getBPReg(STI);
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
+
+ if (MF.getFunction().hasFnAttribute("interrupt"))
+ BuildMI(MBB, MBBI, DL, TII->get(CSKY::NIE));
+
+ // Determine the correct frame layout
+ determineFrameLayout(MF);
+
+ // FIXME (note copied from Lanai): This appears to be overallocating. Needs
+ // investigation. Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI.getStackSize();
+
+ // Early exit if there is no need to allocate on the stack
+ if (StackSize == 0 && !MFI.adjustsStack())
+ return;
+
+ const auto &CSI = MFI.getCalleeSavedInfo();
+
+ unsigned spillAreaSize = CFI->getCalleeSaveAreaSize();
+
+ uint64_t ActualSize = spillAreaSize + CFI->getVarArgsSaveSize();
+
+ // First part stack allocation.
+ adjustReg(MBB, MBBI, DL, SPReg, SPReg, -(static_cast<int64_t>(ActualSize)),
+ MachineInstr::NoFlags);
+
+ // Emit ".cfi_def_cfa_offset FirstSPAdjustAmount"
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, ActualSize));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // The frame pointer is callee-saved, and code has been generated for us to
+ // save it to the stack. We need to skip over the storing of callee-saved
+ // registers as the frame pointer must be modified after it has been saved
+ // to the stack, not before.
+ // FIXME: assumes exactly one instruction is used to save each callee-saved
+ // register.
+ std::advance(MBBI, CSI.size());
+
+ // Iterate over list of callee-saved registers and emit .cfi_offset
+ // directives.
+ for (const auto &Entry : CSI) {
+ int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx());
+ Register Reg = Entry.getReg();
+
+ unsigned Num = TRI->getRegSizeInBits(Reg, MRI) / 32;
+ for (unsigned i = 0; i < Num; i++) {
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, RI->getDwarfRegNum(Reg, true) + i, Offset + i * 4));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+ }
+
+ // Generate new FP.
+ if (hasFP(MF)) {
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), FPReg)
+ .addReg(SPReg)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Emit ".cfi_def_cfa_register $fp"
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+ nullptr, RI->getDwarfRegNum(FPReg, true)));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // Second part stack allocation.
+ adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ -(static_cast<int64_t>(StackSize - ActualSize)),
+ MachineInstr::NoFlags);
+
+ // Realign Stack
+ const CSKYRegisterInfo *RI = STI.getRegisterInfo();
+ if (RI->hasStackRealignment(MF)) {
+ Align MaxAlignment = MFI.getMaxAlign();
+
+ const CSKYInstrInfo *TII = STI.getInstrInfo();
+ if (STI.hasE2() && isUInt<12>(~(-(int)MaxAlignment.value()))) {
+ BuildMI(MBB, MBBI, DL, TII->get(CSKY::ANDNI32), SPReg)
+ .addReg(SPReg)
+ .addImm(~(-(int)MaxAlignment.value()));
+ } else {
+ unsigned ShiftAmount = Log2(MaxAlignment);
+
+ if (STI.hasE2()) {
+ Register VR =
+ MF.getRegInfo().createVirtualRegister(&CSKY::GPRRegClass);
+ BuildMI(MBB, MBBI, DL, TII->get(CSKY::LSRI32), VR)
+ .addReg(SPReg)
+ .addImm(ShiftAmount);
+ BuildMI(MBB, MBBI, DL, TII->get(CSKY::LSLI32), SPReg)
+ .addReg(VR)
+ .addImm(ShiftAmount);
+ } else {
+ Register VR =
+ MF.getRegInfo().createVirtualRegister(&CSKY::mGPRRegClass);
+ BuildMI(MBB, MBBI, DL, TII->get(CSKY::MOV16), VR).addReg(SPReg);
+ BuildMI(MBB, MBBI, DL, TII->get(CSKY::LSRI16), VR)
+ .addReg(VR)
+ .addImm(ShiftAmount);
+ BuildMI(MBB, MBBI, DL, TII->get(CSKY::LSLI16), VR)
+ .addReg(VR)
+ .addImm(ShiftAmount);
+ BuildMI(MBB, MBBI, DL, TII->get(CSKY::MOV16), SPReg).addReg(VR);
+ }
+ }
+ }
+
+ // FP will be used to restore the frame in the epilogue, so we need
+ // another base register BP to record SP after re-alignment. SP will
+ // track the current stack after allocating variable sized objects.
+ if (hasBP(MF)) {
+ // move BP, SP
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), BPReg).addReg(SPReg);
+ }
+
+ } else {
+ adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ -(static_cast<int64_t>(StackSize - ActualSize)),
+ MachineInstr::NoFlags);
+ // Emit ".cfi_def_cfa_offset StackSize"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
}
void CSKYFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- // FIXME: Implement this when we have function calls
+ CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ Register FPReg = getFPReg(STI);
+ Register SPReg = CSKY::R14;
+
+ // Get the insert location for the epilogue. If there were no terminators in
+ // the block, get the last instruction.
+ MachineBasicBlock::iterator MBBI = MBB.end();
+ DebugLoc DL;
+ if (!MBB.empty()) {
+ MBBI = MBB.getFirstTerminator();
+ if (MBBI == MBB.end())
+ MBBI = MBB.getLastNonDebugInstr();
+ DL = MBBI->getDebugLoc();
+
+ // If this is not a terminator, the actual insert location should be after
+ // the last instruction.
+ if (!MBBI->isTerminator())
+ MBBI = std::next(MBBI);
+ }
+
+ const auto &CSI = MFI.getCalleeSavedInfo();
+ uint64_t StackSize = MFI.getStackSize();
+
+ uint64_t ActualSize =
+ CFI->getCalleeSaveAreaSize() + CFI->getVarArgsSaveSize();
+
+ // Skip to before the restores of callee-saved registers
+ // FIXME: assumes exactly one instruction is used to restore each
+ // callee-saved register.
+ auto LastFrameDestroy = MBBI;
+ if (!CSI.empty())
+ LastFrameDestroy = std::prev(MBBI, CSI.size());
+
+ if (hasFP(MF)) {
+ const CSKYInstrInfo *TII = STI.getInstrInfo();
+ BuildMI(MBB, LastFrameDestroy, DL, TII->get(TargetOpcode::COPY), SPReg)
+ .addReg(FPReg)
+ .setMIFlag(MachineInstr::NoFlags);
+ } else {
+ adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg, (StackSize - ActualSize),
+ MachineInstr::FrameDestroy);
+ }
+
+ adjustReg(MBB, MBBI, DL, SPReg, SPReg, ActualSize,
+ MachineInstr::FrameDestroy);
+}
+
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
+ const CSKYSubtarget &STI) {
+ unsigned Limit = (1 << 12) - 1;
+
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (MI.isDebugInstr())
+ continue;
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isFI())
+ continue;
+
+ if (MI.getOpcode() == CSKY::SPILL_CARRY ||
+ MI.getOpcode() == CSKY::RESTORE_CARRY ||
+ MI.getOpcode() == CSKY::STORE_PAIR ||
+ MI.getOpcode() == CSKY::LOAD_PAIR) {
+ Limit = std::min(Limit, ((1U << 12) - 1) * 4);
+ break;
+ }
+
+ if (MI.getOpcode() == CSKY::ADDI32) {
+ Limit = std::min(Limit, (1U << 12));
+ break;
+ }
+
+ if (MI.getOpcode() == CSKY::ADDI16XZ) {
+ Limit = std::min(Limit, (1U << 3));
+ break;
+ }
+
+ // ADDI16 will not require an extra register,
+ // it can reuse the destination.
+ if (MI.getOpcode() == CSKY::ADDI16)
+ break;
+
+ // Otherwise check the addressing mode.
+ switch (MI.getDesc().TSFlags & CSKYII::AddrModeMask) {
+ default:
+ LLVM_DEBUG(MI.dump());
+ llvm_unreachable(
+ "Unhandled addressing mode in stack size limit calculation");
+ case CSKYII::AddrMode32B:
+ Limit = std::min(Limit, (1U << 12) - 1);
+ break;
+ case CSKYII::AddrMode32H:
+ Limit = std::min(Limit, ((1U << 12) - 1) * 2);
+ break;
+ case CSKYII::AddrMode32WD:
+ Limit = std::min(Limit, ((1U << 12) - 1) * 4);
+ break;
+ case CSKYII::AddrMode16B:
+ Limit = std::min(Limit, (1U << 5) - 1);
+ break;
+ case CSKYII::AddrMode16H:
+ Limit = std::min(Limit, ((1U << 5) - 1) * 2);
+ break;
+ case CSKYII::AddrMode16W:
+ Limit = std::min(Limit, ((1U << 5) - 1) * 4);
+ break;
+ case CSKYII::AddrMode32SDF:
+ Limit = std::min(Limit, ((1U << 8) - 1) * 4);
+ break;
+ }
+ break; // At most one FI per instruction
+ }
+ }
+ }
+
+ return Limit;
+}
+
+void CSKYFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+ CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ if (hasFP(MF))
+ SavedRegs.set(CSKY::R8);
+
+ // Mark BP as used if function has dedicated base pointer.
+ if (hasBP(MF))
+ SavedRegs.set(CSKY::R7);
+
+ // If interrupt is enabled and there are calls in the handler,
+ // unconditionally save all Caller-saved registers and
+ // all FP registers, regardless whether they are used.
+ if (MF.getFunction().hasFnAttribute("interrupt") && MFI.hasCalls()) {
+
+ static const MCPhysReg CSRegs[] = {CSKY::R0, CSKY::R1, CSKY::R2, CSKY::R3,
+ CSKY::R12, CSKY::R13, 0};
+
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ SavedRegs.set(CSRegs[i]);
+
+ if (STI.hasHighRegisters()) {
+
+ static const MCPhysReg CSHRegs[] = {CSKY::R18, CSKY::R19, CSKY::R20,
+ CSKY::R21, CSKY::R22, CSKY::R23,
+ CSKY::R24, CSKY::R25, 0};
+
+ for (unsigned i = 0; CSHRegs[i]; ++i)
+ SavedRegs.set(CSHRegs[i]);
+ }
+
+ static const MCPhysReg CSF32Regs[] = {
+ CSKY::F8_32, CSKY::F9_32, CSKY::F10_32,
+ CSKY::F11_32, CSKY::F12_32, CSKY::F13_32,
+ CSKY::F14_32, CSKY::F15_32, 0};
+ static const MCPhysReg CSF64Regs[] = {
+ CSKY::F8_64, CSKY::F9_64, CSKY::F10_64,
+ CSKY::F11_64, CSKY::F12_64, CSKY::F13_64,
+ CSKY::F14_64, CSKY::F15_64, 0};
+
+ const MCPhysReg *FRegs = NULL;
+ if (STI.hasFPUv2DoubleFloat() || STI.hasFPUv3DoubleFloat())
+ FRegs = CSF64Regs;
+ else if (STI.hasFPUv2SingleFloat() || STI.hasFPUv3SingleFloat())
+ FRegs = CSF32Regs;
+
+ if (FRegs != NULL) {
+ const MCPhysReg *Regs = MF.getRegInfo().getCalleeSavedRegs();
+
+ for (unsigned i = 0; Regs[i]; ++i)
+ if (CSKY::FPR32RegClass.contains(Regs[i]) ||
+ CSKY::FPR64RegClass.contains(Regs[i])) {
+ unsigned x = 0;
+ for (; FRegs[x]; ++x)
+ if (FRegs[x] == Regs[i])
+ break;
+ if (FRegs[x] == 0)
+ SavedRegs.set(Regs[i]);
+ }
+ }
+ }
+
+ CFI->setLRIsSpilled(SavedRegs.test(CSKY::R15));
+
+ unsigned CSStackSize = 0;
+ for (unsigned Reg : SavedRegs.set_bits()) {
+ auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
+ CSStackSize += RegSize;
+ }
+
+ CFI->setCalleeSaveAreaSize(CSStackSize);
+
+ uint64_t Limit = estimateRSStackSizeLimit(MF, STI);
+
+ bool BigFrame = (MFI.estimateStackSize(MF) + CSStackSize >= Limit);
+
+ if (BigFrame || CFI->isCRSpilled() || !STI.hasE2()) {
+ const TargetRegisterClass *RC = &CSKY::GPRRegClass;
+ unsigned size = TRI->getSpillSize(*RC);
+ Align align = TRI->getSpillAlign(*RC);
+
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(size, align, false));
+ }
+}
+
+// Not preserve stack space within prologue for outgoing variables when the
+// function contains variable size objects and let eliminateCallFramePseudoInstr
+// preserve stack space for it.
+bool CSKYFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo().hasVarSizedObjects();
+}
+
+bool CSKYFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+ DebugLoc DL;
+ if (MI != MBB.end() && !MI->isDebugInstr())
+ DL = MI->getDebugLoc();
+
+ for (auto &CS : CSI) {
+ // Insert the spill to the stack frame.
+ Register Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), RC, TRI);
+ }
+
+ return true;
+}
+
+bool CSKYFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+ DebugLoc DL;
+ if (MI != MBB.end() && !MI->isDebugInstr())
+ DL = MI->getDebugLoc();
+
+ for (auto &CS : reverse(CSI)) {
+ Register Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
+ assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
+ }
+
+ return true;
+}
+
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.
+MachineBasicBlock::iterator CSKYFrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ Register SPReg = CSKY::R14;
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (!hasReservedCallFrame(MF)) {
+ // If space has not been reserved for a call frame, ADJCALLSTACKDOWN and
+ // ADJCALLSTACKUP must be converted to instructions manipulating the stack
+ // pointer. This is necessary when there is a variable length stack
+ // allocation (e.g. alloca), which means it's not possible to allocate
+ // space for outgoing arguments from within the function prologue.
+ int64_t Amount = MI->getOperand(0).getImm();
+
+ if (Amount != 0) {
+ // Ensure the stack remains aligned after adjustment.
+ Amount = alignSPAdjust(Amount);
+
+ if (MI->getOpcode() == CSKY::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ adjustReg(MBB, MI, DL, SPReg, SPReg, Amount, MachineInstr::NoFlags);
+ }
+ }
+
+ return MBB.erase(MI);
+}
+
+void CSKYFrameLowering::adjustReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register DestReg,
+ Register SrcReg, int64_t Val,
+ MachineInstr::MIFlag Flag) const {
+ const CSKYInstrInfo *TII = STI.getInstrInfo();
+
+ if (DestReg == SrcReg && Val == 0)
+ return;
+
+ // TODO: Add 16-bit instruction support with immediate num
+ if (STI.hasE2() && isUInt<12>(std::abs(Val) - 1)) {
+ BuildMI(MBB, MBBI, DL, TII->get(Val < 0 ? CSKY::SUBI32 : CSKY::ADDI32),
+ DestReg)
+ .addReg(SrcReg)
+ .addImm(std::abs(Val))
+ .setMIFlag(Flag);
+ } else if (!STI.hasE2() && isShiftedUInt<7, 2>(std::abs(Val))) {
+ BuildMI(MBB, MBBI, DL,
+ TII->get(Val < 0 ? CSKY::SUBI16SPSP : CSKY::ADDI16SPSP), CSKY::R14)
+ .addReg(CSKY::R14, RegState::Kill)
+ .addImm(std::abs(Val))
+ .setMIFlag(Flag);
+ } else {
+
+ unsigned Op = 0;
+
+ if (STI.hasE2()) {
+ Op = Val < 0 ? CSKY::SUBU32 : CSKY::ADDU32;
+ } else {
+ assert(SrcReg == DestReg);
+ Op = Val < 0 ? CSKY::SUBU16XZ : CSKY::ADDU16XZ;
+ }
+
+ Register ScratchReg = TII->movImm(MBB, MBBI, DL, std::abs(Val), Flag);
+
+ BuildMI(MBB, MBBI, DL, TII->get(Op), DestReg)
+ .addReg(SrcReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .setMIFlag(Flag);
+ }
+}
+
+StackOffset
+CSKYFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
+ const CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+ const auto &CSI = MFI.getCalleeSavedInfo();
+
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ int Offset = MFI.getObjectOffset(FI) + MFI.getOffsetAdjustment();
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ if (FI >= MinCSFI && FI <= MaxCSFI) {
+ FrameReg = CSKY::R14;
+ Offset += CFI->getVarArgsSaveSize() + CFI->getCalleeSaveAreaSize();
+ } else if (RI->hasStackRealignment(MF)) {
+ assert(hasFP(MF));
+ if (!MFI.isFixedObjectIndex(FI)) {
+ FrameReg = hasBP(MF) ? getBPReg(STI) : CSKY::R14;
+ Offset += MFI.getStackSize();
+ } else {
+ FrameReg = getFPReg(STI);
+ Offset += CFI->getVarArgsSaveSize() + CFI->getCalleeSaveAreaSize();
+ }
+ } else {
+ if (MFI.isFixedObjectIndex(FI) && hasFP(MF)) {
+ FrameReg = getFPReg(STI);
+ Offset += CFI->getVarArgsSaveSize() + CFI->getCalleeSaveAreaSize();
+ } else {
+ FrameReg = hasBP(MF) ? getBPReg(STI) : CSKY::R14;
+ Offset += MFI.getStackSize();
+ }
+ }
+
+ return StackOffset::getFixed(Offset);
}
diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.h b/llvm/lib/Target/CSKY/CSKYFrameLowering.h
index 49921a1866bc..69bf01cf1801 100644
--- a/llvm/lib/Target/CSKY/CSKYFrameLowering.h
+++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.h
@@ -21,6 +21,11 @@ class CSKYSubtarget;
class CSKYFrameLowering : public TargetFrameLowering {
const CSKYSubtarget &STI;
+ void determineFrameLayout(MachineFunction &MF) const;
+ void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register DestReg, Register SrcReg,
+ int64_t Val, MachineInstr::MIFlag Flag) const;
+
public:
explicit CSKYFrameLowering(const CSKYSubtarget &STI)
: TargetFrameLowering(StackGrowsDown,
@@ -31,8 +36,39 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const override;
+
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
+
+ bool assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override {
+
+ std::reverse(CSI.begin(), CSI.end());
+
+ return false;
+ }
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ ArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
+ bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ MutableArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
+
bool hasFP(const MachineFunction &MF) const override;
bool hasBP(const MachineFunction &MF) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
+
+ MachineBasicBlock::iterator
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
};
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
index 8dc91904b8cc..d58f9095aa0d 100644
--- a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
@@ -68,6 +68,24 @@ void CSKYDAGToDAGISel::Select(SDNode *N) {
case ISD::SUBCARRY:
IsSelected = selectSubCarry(N);
break;
+ case ISD::GLOBAL_OFFSET_TABLE: {
+ Register GP = Subtarget->getInstrInfo()->getGlobalBaseReg(*MF);
+ ReplaceNode(N, CurDAG->getRegister(GP, N->getValueType(0)).getNode());
+
+ IsSelected = true;
+ break;
+ }
+ case ISD::FrameIndex: {
+ SDValue Imm = CurDAG->getTargetConstant(0, Dl, MVT::i32);
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+ ReplaceNode(N, CurDAG->getMachineNode(Subtarget->hasE2() ? CSKY::ADDI32
+ : CSKY::ADDI16XZ,
+ Dl, MVT::i32, TFI, Imm));
+
+ IsSelected = true;
+ break;
+ }
}
if (IsSelected)
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index a1f7cc685d4c..0b589e3d3e4f 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -13,6 +13,7 @@
#include "CSKYISelLowering.h"
#include "CSKYCallingConv.h"
+#include "CSKYConstantPoolValue.h"
#include "CSKYMachineFunctionInfo.h"
#include "CSKYRegisterInfo.h"
#include "CSKYSubtarget.h"
@@ -37,6 +38,18 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
// Register Class
addRegisterClass(MVT::i32, &CSKY::GPRRegClass);
+ if (STI.useHardFloat()) {
+ if (STI.hasFPUv2SingleFloat())
+ addRegisterClass(MVT::f32, &CSKY::sFPR32RegClass);
+ else if (STI.hasFPUv3SingleFloat())
+ addRegisterClass(MVT::f32, &CSKY::FPR32RegClass);
+
+ if (STI.hasFPUv2DoubleFloat())
+ addRegisterClass(MVT::f64, &CSKY::sFPR64RegClass);
+ else if (STI.hasFPUv3DoubleFloat())
+ addRegisterClass(MVT::f64, &CSKY::FPR64RegClass);
+ }
+
setOperationAction(ISD::ADDCARRY, MVT::i32, Legal);
setOperationAction(ISD::SUBCARRY, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
@@ -53,16 +66,29 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::i32, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, MVT::i1, Promote);
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+
if (!Subtarget.hasE2()) {
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, MVT::i8, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, MVT::i16, Expand);
@@ -77,6 +103,44 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i32, Expand);
}
+ if (!Subtarget.has3r2E3r3()) {
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ }
+
+ // Float
+
+ ISD::CondCode FPCCToExtend[] = {
+ ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
+ ISD::SETUGE, ISD::SETULT, ISD::SETULE,
+ };
+
+ ISD::NodeType FPOpToExpand[] = {ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
+ ISD::FPOW, ISD::FREM, ISD::FCOPYSIGN};
+
+ if (STI.useHardFloat()) {
+
+ MVT AllVTy[] = {MVT::f32, MVT::f64};
+
+ for (auto VT : AllVTy) {
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::BR_CC, VT, Expand);
+
+ for (auto CC : FPCCToExtend)
+ setCondCodeAction(CC, VT, Expand);
+ for (auto Op : FPOpToExpand)
+ setOperationAction(Op, VT, Expand);
+ }
+
+ if (STI.hasFPUv2SingleFloat() || STI.hasFPUv3SingleFloat()) {
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ }
+ if (STI.hasFPUv2DoubleFloat() || STI.hasFPUv3DoubleFloat()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ }
+ }
+
// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());
@@ -92,6 +156,30 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
setSchedulingPreference(Sched::Source);
}
+SDValue CSKYTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("unimplemented op");
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ case ISD::ExternalSymbol:
+ return LowerExternalSymbol(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG);
+ case ISD::BlockAddress:
+ return LowerBlockAddress(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG);
+ case ISD::FRAMEADDR:
+ return LowerFRAMEADDR(Op, DAG);
+ case ISD::RETURNADDR:
+ return LowerRETURNADDR(Op, DAG);
+ }
+}
+
EVT CSKYTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &Context, EVT VT) const {
if (!VT.isVector())
@@ -145,6 +233,14 @@ static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget,
case MVT::i32:
RC = &CSKY::GPRRegClass;
break;
+ case MVT::f32:
+ RC = Subtarget.hasFPUv2SingleFloat() ? &CSKY::sFPR32RegClass
+ : &CSKY::FPR32RegClass;
+ break;
+ case MVT::f64:
+ RC = Subtarget.hasFPUv2DoubleFloat() ? &CSKY::sFPR64RegClass
+ : &CSKY::FPR64RegClass;
+ break;
}
Register VReg = RegInfo.createVirtualRegister(RC);
@@ -181,6 +277,44 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
return Val;
}
+static SDValue unpack64(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA,
+ const SDLoc &DL) {
+ assert(VA.getLocVT() == MVT::i32 &&
+ (VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::i64) &&
+ "Unexpected VA");
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ if (VA.isMemLoc()) {
+ // f64/i64 is passed on the stack.
+ int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ return DAG.getLoad(VA.getValVT(), DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(MF, FI));
+ }
+
+ assert(VA.isRegLoc() && "Expected register VA assignment");
+
+ Register LoVReg = RegInfo.createVirtualRegister(&CSKY::GPRRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
+ SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
+ SDValue Hi;
+ if (VA.getLocReg() == CSKY::R3) {
+ // Second half of f64/i64 is passed on the stack.
+ int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(MF, FI));
+ } else {
+ // Second half of f64/i64 is passed in another GPR.
+ Register HiVReg = RegInfo.createVirtualRegister(&CSKY::GPRRegClass);
+ RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
+ Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
+ }
+ return DAG.getNode(CSKYISD::BITCAST_FROM_LOHI, DL, VA.getValVT(), Lo, Hi);
+}
+
// Transform physical registers into virtual registers.
SDValue CSKYTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
@@ -210,7 +344,11 @@ SDValue CSKYTargetLowering::LowerFormalArguments(
CCValAssign &VA = ArgLocs[i];
SDValue ArgValue;
- if (VA.isRegLoc())
+ bool IsF64OnCSKY = VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
+
+ if (IsF64OnCSKY)
+ ArgValue = unpack64(DAG, Chain, VA, DL);
+ else if (VA.isRegLoc())
ArgValue = unpackFromRegLoc(Subtarget, DAG, Chain, VA, DL);
else
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
@@ -354,6 +492,255 @@ CSKYTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return DAG.getNode(CSKYISD::RET, DL, MVT::Other, RetOps);
}
+// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
+// and output parameter nodes.
+SDValue CSKYTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc &DL = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &IsTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool IsVarArg = CLI.IsVarArg;
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ MVT XLenVT = MVT::i32;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Analyze the operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+ ArgCCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, IsVarArg));
+
+ // Check if it's really possible to do a tail call.
+ if (IsTailCall)
+ IsTailCall = false; // TODO: TailCallOptimization;
+
+ if (IsTailCall)
+ ++NumTailCalls;
+ else if (CLI.CB && CLI.CB->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+
+ // Create local copies for byval args
+ SmallVector<SDValue, 8> ByValArgs;
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (!Flags.isByVal())
+ continue;
+
+ SDValue Arg = OutVals[i];
+ unsigned Size = Flags.getByValSize();
+ Align Alignment = Flags.getNonZeroByValAlign();
+
+ int FI =
+ MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
+
+ Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
+ /*IsVolatile=*/false,
+ /*AlwaysInline=*/false, IsTailCall,
+ MachinePointerInfo(), MachinePointerInfo());
+ ByValArgs.push_back(FIPtr);
+ }
+
+ if (!IsTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
+
+ // Copy argument values to their designated locations.
+ SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+ for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue ArgValue = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ bool IsF64OnCSKY = VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
+
+ if (IsF64OnCSKY && VA.isRegLoc()) {
+ SDValue Split64 =
+ DAG.getNode(CSKYISD::BITCAST_TO_LOHI, DL,
+ DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
+ SDValue Lo = Split64.getValue(0);
+ SDValue Hi = Split64.getValue(1);
+
+ Register RegLo = VA.getLocReg();
+ RegsToPass.push_back(std::make_pair(RegLo, Lo));
+
+ if (RegLo == CSKY::R3) {
+ // Second half of f64/i64 is passed on the stack.
+ // Work out the address of the stack slot.
+ if (!StackPtr.getNode())
+ StackPtr = DAG.getCopyFromReg(Chain, DL, CSKY::R14, PtrVT);
+ // Emit the store.
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
+ } else {
+ // Second half of f64/i64 is passed in another GPR.
+ assert(RegLo < CSKY::R31 && "Invalid register pair");
+ Register RegHigh = RegLo + 1;
+ RegsToPass.push_back(std::make_pair(RegHigh, Hi));
+ }
+ continue;
+ }
+
+ ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
+
+ // Use local copy if it is a byval arg.
+ if (Flags.isByVal())
+ ArgValue = ByValArgs[j++];
+
+ if (VA.isRegLoc()) {
+ // Queue up the argument copies and emit them at the end.
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
+ } else {
+ assert(VA.isMemLoc() && "Argument not register or memory");
+ assert(!IsTailCall && "Tail call not allowed if stack is used "
+ "for passing parameters");
+
+ // Work out the address of the stack slot.
+ if (!StackPtr.getNode())
+ StackPtr = DAG.getCopyFromReg(Chain, DL, CSKY::R14, PtrVT);
+ SDValue Address =
+ DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
+
+ // Emit the store.
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
+ }
+ }
+
+ // Join the stores, which are independent of one another.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
+
+ SDValue Glue;
+
+ // Build a sequence of copy-to-reg nodes, chained and glued together.
+ for (auto &Reg : RegsToPass) {
+ Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
+ Glue = Chain.getValue(1);
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ bool IsRegCall = false;
+
+ Ops.push_back(Chain);
+
+ if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = S->getGlobal();
+ bool IsLocal =
+ getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
+
+ if (isPositionIndependent() || !Subtarget.has2E3()) {
+ IsRegCall = true;
+ Ops.push_back(getAddr<GlobalAddressSDNode, true>(S, DAG, IsLocal));
+ } else {
+ Ops.push_back(getTargetNode(cast<GlobalAddressSDNode>(Callee), DL, Ty,
+ DAG, CSKYII::MO_None));
+ Ops.push_back(getTargetConstantPoolValue(
+ cast<GlobalAddressSDNode>(Callee), Ty, DAG, CSKYII::MO_None));
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(
+ *MF.getFunction().getParent(), nullptr);
+
+ if (isPositionIndependent() || !Subtarget.has2E3()) {
+ IsRegCall = true;
+ Ops.push_back(getAddr<ExternalSymbolSDNode, true>(S, DAG, IsLocal));
+ } else {
+ Ops.push_back(getTargetNode(cast<ExternalSymbolSDNode>(Callee), DL, Ty,
+ DAG, CSKYII::MO_None));
+ Ops.push_back(getTargetConstantPoolValue(
+ cast<ExternalSymbolSDNode>(Callee), Ty, DAG, CSKYII::MO_None));
+ }
+ } else {
+ IsRegCall = true;
+ Ops.push_back(Callee);
+ }
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (auto &Reg : RegsToPass)
+ Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
+
+ if (!IsTailCall) {
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
+
+ // Glue the call to the argument copies, if any.
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+
+ // Emit the call.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ if (IsTailCall) {
+ MF.getFrameInfo().setHasTailCall();
+ return DAG.getNode(IsRegCall ? CSKYISD::TAILReg : CSKYISD::TAIL, DL,
+ NodeTys, Ops);
+ }
+
+ Chain = DAG.getNode(IsRegCall ? CSKYISD::CALLReg : CSKYISD::CALL, DL, NodeTys,
+ Ops);
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
+ Glue = Chain.getValue(1);
+
+ // Mark the end of the call, which is glued to the call itself.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, DL, PtrVT, true),
+ DAG.getConstant(0, DL, PtrVT, true), Glue, DL);
+ Glue = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> CSKYLocs;
+ CCState RetCCInfo(CallConv, IsVarArg, MF, CSKYLocs, *DAG.getContext());
+ RetCCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, IsVarArg));
+
+ // Copy all of the result registers out of their specified physreg.
+ for (auto &VA : CSKYLocs) {
+ // Copy the value out
+ SDValue RetValue =
+ DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
+ // Glue the RetValue to the end of the call sequence
+ Chain = RetValue.getValue(1);
+ Glue = RetValue.getValue(2);
+
+ bool IsF64OnCSKY = VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
+
+ if (IsF64OnCSKY) {
+ assert(VA.getLocReg() == GPRArgRegs[0] && "Unexpected reg assignment");
+ SDValue RetValue2 =
+ DAG.getCopyFromReg(Chain, DL, GPRArgRegs[1], MVT::i32, Glue);
+ Chain = RetValue2.getValue(1);
+ Glue = RetValue2.getValue(2);
+ RetValue = DAG.getNode(CSKYISD::BITCAST_FROM_LOHI, DL, VA.getValVT(),
+ RetValue, RetValue2);
+ }
+
+ RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
+
+ InVals.push_back(RetValue);
+ }
+
+ return Chain;
+}
+
CCAssignFn *CSKYTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
bool IsVarArg) const {
if (IsVarArg || !Subtarget.useHardFloatABI())
@@ -370,6 +757,165 @@ CCAssignFn *CSKYTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
return CC_CSKY_ABIV2_FP;
}
+static CSKYCP::CSKYCPModifier getModifier(unsigned Flags) {
+
+ if (Flags == CSKYII::MO_ADDR32)
+ return CSKYCP::ADDR;
+ else if (Flags == CSKYII::MO_GOT32)
+ return CSKYCP::GOT;
+ else if (Flags == CSKYII::MO_GOTOFF)
+ return CSKYCP::GOTOFF;
+ else if (Flags == CSKYII::MO_PLT32)
+ return CSKYCP::PLT;
+ else if (Flags == CSKYII::MO_None)
+ return CSKYCP::NO_MOD;
+ else
+ assert(0 && "unknown CSKYII Modifier");
+ return CSKYCP::NO_MOD;
+}
+
+SDValue CSKYTargetLowering::getTargetConstantPoolValue(GlobalAddressSDNode *N,
+ EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flags) const {
+ CSKYConstantPoolValue *CPV = CSKYConstantPoolConstant::Create(
+ N->getGlobal(), CSKYCP::CPValue, 0, getModifier(Flags), false);
+
+ return DAG.getTargetConstantPool(CPV, Ty);
+}
+
+static MachineBasicBlock *
+emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode) {
+
+ const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ // To "insert" a SELECT instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // bt32 c, sinkMBB
+ // fallthrough --> copyMBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copyMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copyMBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copyMBB);
+ BB->addSuccessor(sinkMBB);
+
+ // bt32 condition, sinkMBB
+ BuildMI(BB, DL, TII.get(Opcode))
+ .addReg(MI.getOperand(1).getReg())
+ .addMBB(sinkMBB);
+
+ // copyMBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copyMBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copyMBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL, TII.get(CSKY::PHI), MI.getOperand(0).getReg())
+ .addReg(MI.getOperand(2).getReg())
+ .addMBB(thisMBB)
+ .addReg(MI.getOperand(3).getReg())
+ .addMBB(copyMBB);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+CSKYTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
+ case CSKY::ISEL32:
+ return emitSelectPseudo(MI, BB, CSKY::BT32);
+ case CSKY::ISEL16:
+ return emitSelectPseudo(MI, BB, CSKY::BT16);
+ }
+}
+
+SDValue CSKYTargetLowering::getTargetConstantPoolValue(ExternalSymbolSDNode *N,
+ EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flags) const {
+ CSKYConstantPoolValue *CPV =
+ CSKYConstantPoolSymbol::Create(Type::getInt32Ty(*DAG.getContext()),
+ N->getSymbol(), 0, getModifier(Flags));
+
+ return DAG.getTargetConstantPool(CPV, Ty);
+}
+
+SDValue CSKYTargetLowering::getTargetConstantPoolValue(JumpTableSDNode *N,
+ EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flags) const {
+ CSKYConstantPoolValue *CPV =
+ CSKYConstantPoolJT::Create(Type::getInt32Ty(*DAG.getContext()),
+ N->getIndex(), 0, getModifier(Flags));
+ return DAG.getTargetConstantPool(CPV, Ty);
+}
+
+SDValue CSKYTargetLowering::getTargetConstantPoolValue(BlockAddressSDNode *N,
+ EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flags) const {
+ CSKYConstantPoolValue *CPV = CSKYConstantPoolConstant::Create(
+ N->getBlockAddress(), CSKYCP::CPBlockAddress, 0, getModifier(Flags),
+ false);
+ return DAG.getTargetConstantPool(CPV, Ty);
+}
+
+SDValue CSKYTargetLowering::getTargetNode(GlobalAddressSDNode *N, SDLoc DL,
+ EVT Ty, SelectionDAG &DAG,
+ unsigned Flags) const {
+ return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
+}
+
+SDValue CSKYTargetLowering::getTargetNode(ExternalSymbolSDNode *N, SDLoc DL,
+ EVT Ty, SelectionDAG &DAG,
+ unsigned Flags) const {
+ return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flags);
+}
+
+SDValue CSKYTargetLowering::getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flags) const {
+ return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
+}
+
+SDValue CSKYTargetLowering::getTargetNode(BlockAddressSDNode *N, SDLoc DL,
+ EVT Ty, SelectionDAG &DAG,
+ unsigned Flags) const {
+ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
+ Flags);
+}
+
const char *CSKYTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default:
@@ -380,7 +926,243 @@ const char *CSKYTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "CSKYISD::NIR";
case CSKYISD::RET:
return "CSKYISD::RET";
+ case CSKYISD::CALL:
+ return "CSKYISD::CALL";
+ case CSKYISD::CALLReg:
+ return "CSKYISD::CALLReg";
+ case CSKYISD::TAIL:
+ return "CSKYISD::TAIL";
+ case CSKYISD::TAILReg:
+ return "CSKYISD::TAILReg";
+ case CSKYISD::LOAD_ADDR:
+ return "CSKYISD::LOAD_ADDR";
case CSKYISD::BITCAST_TO_LOHI:
return "CSKYISD::BITCAST_TO_LOHI";
+ case CSKYISD::BITCAST_FROM_LOHI:
+ return "CSKYISD::BITCAST_FROM_LOHI";
}
}
+
+SDValue CSKYTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT Ty = Op.getValueType();
+ GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
+ int64_t Offset = N->getOffset();
+
+ const GlobalValue *GV = N->getGlobal();
+ bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
+ SDValue Addr = getAddr<GlobalAddressSDNode, false>(N, DAG, IsLocal);
+
+ // In order to maximise the opportunity for common subexpression elimination,
+ // emit a separate ADD node for the global address offset instead of folding
+ // it in the global address node. Later peephole optimisations may choose to
+ // fold it back in when profitable.
+ if (Offset != 0)
+ return DAG.getNode(ISD::ADD, DL, Ty, Addr,
+ DAG.getConstant(Offset, DL, MVT::i32));
+ return Addr;
+}
+
+SDValue CSKYTargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) const {
+ ExternalSymbolSDNode *N = cast<ExternalSymbolSDNode>(Op);
+
+ return getAddr(N, DAG, false);
+}
+
+SDValue CSKYTargetLowering::LowerJumpTable(SDValue Op,
+ SelectionDAG &DAG) const {
+ JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
+
+ return getAddr<JumpTableSDNode, false>(N, DAG);
+}
+
+SDValue CSKYTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
+
+ return getAddr(N, DAG);
+}
+
+SDValue CSKYTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ CSKYMachineFunctionInfo *FuncInfo = MF.getInfo<CSKYMachineFunctionInfo>();
+
+ SDLoc DL(Op);
+ SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy(MF.getDataLayout()));
+
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
+ MachinePointerInfo(SV));
+}
+
+SDValue CSKYTargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ const CSKYRegisterInfo &RI = *Subtarget.getRegisterInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ Register FrameReg = RI.getFrameRegister(MF);
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo());
+ return FrameAddr;
+}
+
+SDValue CSKYTargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ const CSKYRegisterInfo &RI = *Subtarget.getRegisterInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setReturnAddressIsTaken(true);
+
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo());
+ }
+ // Return the value of the return address register, marking it an implicit
+ // live-in.
+ unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(MVT::i32));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+Register CSKYTargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ return CSKY::R0;
+}
+
+Register CSKYTargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ return CSKY::R1;
+}
+
+SDValue CSKYTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT Ty = Op.getValueType();
+ GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
+ int64_t Offset = N->getOffset();
+ MVT XLenVT = MVT::i32;
+
+ TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
+ SDValue Addr;
+ switch (Model) {
+ case TLSModel::LocalExec:
+ Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
+ break;
+ case TLSModel::InitialExec:
+ Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
+ break;
+ case TLSModel::LocalDynamic:
+ case TLSModel::GeneralDynamic:
+ Addr = getDynamicTLSAddr(N, DAG);
+ break;
+ }
+
+ // In order to maximise the opportunity for common subexpression elimination,
+ // emit a separate ADD node for the global address offset instead of folding
+ // it in the global address node. Later peephole optimisations may choose to
+ // fold it back in when profitable.
+ if (Offset != 0)
+ return DAG.getNode(ISD::ADD, DL, Ty, Addr,
+ DAG.getConstant(Offset, DL, XLenVT));
+ return Addr;
+}
+
+SDValue CSKYTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
+ SelectionDAG &DAG,
+ bool UseGOT) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+
+ unsigned CSKYPCLabelIndex = CFI->createPICLabelUId();
+
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+
+ CSKYCP::CSKYCPModifier Flag = UseGOT ? CSKYCP::TLSIE : CSKYCP::TLSLE;
+ bool AddCurrentAddr = UseGOT ? true : false;
+ unsigned char PCAjust = UseGOT ? 4 : 0;
+
+ CSKYConstantPoolValue *CPV =
+ CSKYConstantPoolConstant::Create(N->getGlobal(), CSKYCP::CPValue, PCAjust,
+ Flag, AddCurrentAddr, CSKYPCLabelIndex);
+ SDValue CAddr = DAG.getTargetConstantPool(CPV, Ty);
+
+ SDValue Load;
+ if (UseGOT) {
+ SDValue PICLabel = DAG.getTargetConstant(CSKYPCLabelIndex, DL, MVT::i32);
+ auto *LRWGRS = DAG.getMachineNode(CSKY::PseudoTLSLA32, DL, {Ty, Ty},
+ {CAddr, PICLabel});
+ auto LRWADDGRS =
+ DAG.getNode(ISD::ADD, DL, Ty, SDValue(LRWGRS, 0), SDValue(LRWGRS, 1));
+ Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), LRWADDGRS,
+ MachinePointerInfo(N->getGlobal()));
+ } else {
+ Load = SDValue(DAG.getMachineNode(CSKY::LRW32, DL, Ty, CAddr), 0);
+ }
+
+ // Add the thread pointer.
+ SDValue TPReg = DAG.getRegister(CSKY::R31, MVT::i32);
+ return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
+}
+
+SDValue CSKYTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+
+ unsigned CSKYPCLabelIndex = CFI->createPICLabelUId();
+
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
+
+ CSKYConstantPoolValue *CPV =
+ CSKYConstantPoolConstant::Create(N->getGlobal(), CSKYCP::CPValue, 4,
+ CSKYCP::TLSGD, true, CSKYPCLabelIndex);
+ SDValue Addr = DAG.getTargetConstantPool(CPV, Ty);
+ SDValue PICLabel = DAG.getTargetConstant(CSKYPCLabelIndex, DL, MVT::i32);
+
+ auto *LRWGRS =
+ DAG.getMachineNode(CSKY::PseudoTLSLA32, DL, {Ty, Ty}, {Addr, PICLabel});
+
+ auto Load =
+ DAG.getNode(ISD::ADD, DL, Ty, SDValue(LRWGRS, 0), SDValue(LRWGRS, 1));
+
+ // Prepare argument list to generate call.
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Load;
+ Entry.Ty = CallTy;
+ Args.push_back(Entry);
+
+ // Setup call to __tls_get_addr.
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::C, CallTy,
+ DAG.getExternalSymbol("__tls_get_addr", Ty),
+ std::move(Args));
+ SDValue V = LowerCallTo(CLI).first;
+
+ return V;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.h b/llvm/lib/Target/CSKY/CSKYISelLowering.h
index 7557c11f50a8..e1744d5ce220 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.h
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.h
@@ -27,7 +27,15 @@ enum NodeType : unsigned {
NIE,
NIR,
RET,
- BITCAST_TO_LOHI
+ CALL,
+ CALLReg,
+ TAIL,
+ TAILReg,
+ LOAD_ADDR,
+ // i32, i32 <-- f64
+ BITCAST_TO_LOHI,
+ // f64 < -- i32, i32
+ BITCAST_FROM_LOHI,
};
}
@@ -38,6 +46,8 @@ public:
explicit CSKYTargetLowering(const TargetMachine &TM,
const CSKYSubtarget &STI);
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
@@ -58,8 +68,96 @@ private:
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
const char *getTargetNodeName(unsigned Opcode) const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ Register
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ Register
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
+ bool isSelectSupported(SelectSupportKind Kind) const override {
+ // CSKY does not support scalar condition selects on vectors.
+ return (Kind != ScalarCondVectorVal);
+ }
+
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *BB) const override;
+
+ SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) const;
+
+ SDValue getTargetNode(ExternalSymbolSDNode *N, SDLoc DL, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) const;
+
+ SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, SelectionDAG &DAG,
+ unsigned Flags) const;
+
+ SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) const;
+
+ SDValue getTargetConstantPoolValue(GlobalAddressSDNode *N, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) const;
+
+ SDValue getTargetConstantPoolValue(ExternalSymbolSDNode *N, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) const;
+
+ SDValue getTargetConstantPoolValue(JumpTableSDNode *N, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) const;
+
+ SDValue getTargetConstantPoolValue(BlockAddressSDNode *N, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) const;
+
+ template <class NodeTy, bool IsCall = false>
+ SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const {
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+
+ unsigned Flag = CSKYII::MO_None;
+ bool IsPIC = isPositionIndependent();
+
+ if (IsPIC)
+ Flag = IsLocal ? CSKYII::MO_GOTOFF
+ : IsCall ? CSKYII::MO_PLT32
+ : CSKYII::MO_GOT32;
+
+ SDValue TCPV = getTargetConstantPoolValue(N, Ty, DAG, Flag);
+ SDValue TV = getTargetNode(N, DL, Ty, DAG, Flag);
+ SDValue Addr = DAG.getNode(CSKYISD::LOAD_ADDR, DL, Ty, {TV, TCPV});
+
+ if (!IsPIC)
+ return Addr;
+
+ SDValue Result =
+ DAG.getNode(ISD::ADD, DL, Ty, {DAG.getGLOBAL_OFFSET_TABLE(Ty), Addr});
+ if (IsLocal)
+ return Result;
+
+ return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
+ }
+
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
+ bool UseGOT) const;
+ SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
+
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg) const;
};
diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormatsF1.td b/llvm/lib/Target/CSKY/CSKYInstrFormatsF1.td
new file mode 100644
index 000000000000..446670a4d0a9
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrFormatsF1.td
@@ -0,0 +1,274 @@
+//===- CSKYInstrFormatsF1.td - CSKY Float1.0 Instr Format --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// CSKY Instruction Format Float1.0 Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class CSKYFP1Inst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x3d, outs, ins, asmstr, pattern>, Requires<[HasFPUv2_SF]> {
+}
+
+class F_XYZ_BASE<bits<5> datatype, bits<6> sop, dag outs, dag ins, string opcodestr, list<dag> pattern>
+ : CSKYFP1Inst<outs, ins, opcodestr, pattern> {
+ bits<4> vrx;
+ bits<4> vry;
+ bits<4> vrz;
+ let Inst{25 - 21} = {0, vry};
+ let Inst{20 - 16} = {0, vrx};
+ let Inst{15 - 11} = datatype;
+ let Inst{10 - 5} = sop;
+ let Inst{4 - 0} = {0, vrz};
+}
+
+class F_XZ_GF<bits<5> datatype, bits<6> sop, dag outs, dag ins, string opcodestr, list<dag> pattern>
+ : CSKYFP1Inst<outs, ins, opcodestr, pattern> {
+ bits<4> vrx;
+ bits<5> rz;
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = {0, vrx};
+ let Inst{15 - 11} = datatype;
+ let Inst{10 - 5} = sop;
+ let Inst{4 - 0} = {rz};
+}
+
+class F_XZ_FG<bits<5> datatype, bits<6> sop, dag outs, dag ins, string opcodestr, list<dag> pattern>
+ : CSKYFP1Inst<outs, ins, opcodestr, pattern> {
+ bits<5> rx;
+ bits<4> vrz;
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = {rx};
+ let Inst{15 - 11} = datatype;
+ let Inst{10 - 5} = sop;
+ let Inst{4 - 0} = {0, vrz};
+}
+
+class F_XZ_TRANS_FROM<bits<6> sop, string op, RegisterOperand regtype1, RegisterOperand regtype2>
+ : F_XZ_GF<3, sop, (outs regtype1:$rz), (ins regtype2:$vrx), !strconcat(op, "\t$rz, $vrx"),
+ []>;
+
+class F_XZ_TRANS_TO<bits<6> sop, string op, RegisterOperand regtype1, RegisterOperand regtype2>
+ : F_XZ_FG<3, sop, (outs regtype1:$vrz), (ins regtype2:$rx), !strconcat(op, "\t$vrz, $rx"),
+ []>;
+
+let vry = 0 in {
+class F_XZ<bits<5> datatype, bits<6> sop, string op, string op_su, PatFrag opnode, RegisterOperand regtype>
+ : F_XYZ_BASE<datatype, sop, (outs regtype:$vrz), (ins regtype:$vrx), !strconcat(op#op_su, "\t$vrz, $vrx"),
+ [(set regtype:$vrz, (opnode regtype:$vrx))]>;
+
+class F_MOV<bits<5> datatype, bits<6> sop, string op, string op_su, RegisterOperand regtype>
+ : F_XYZ_BASE<datatype, sop, (outs regtype:$vrz), (ins regtype:$vrx), !strconcat(op#op_su, "\t$vrz, $vrx"),
+ []>;
+
+class F_XZ_TRANS<bits<6> sop, string op, RegisterOperand regtype1, RegisterOperand regtype2>
+ : F_XYZ_BASE<3, sop, (outs regtype1:$vrz), (ins regtype2:$vrx), !strconcat(op, "\t$vrz, $vrx"),
+ []>;
+
+class F_XZ_TRANS_DS<bits<6> sop, string op, PatFrag opnode>
+ : F_XYZ_BASE<3, sop, (outs sFPR32Op:$vrz), (ins sFPR64Op:$vrx), !strconcat(op, "\t$vrz, $vrx"),
+ [(set sFPR32Op:$vrz, (opnode sFPR64Op:$vrx))]>;
+
+class F_XZ_TRANS_SD<bits<6> sop, string op, PatFrag opnode>
+ : F_XYZ_BASE<3, sop, (outs sFPR64Op:$vrz), (ins sFPR32Op:$vrx), !strconcat(op, "\t$vrz, $vrx"),
+ [(set sFPR64Op:$vrz, (opnode sFPR32Op:$vrx))]>;
+}
+
+multiclass FT_MOV<bits<6> sop, string op> {
+ def _S : F_MOV<0, sop, op, "s", sFPR32Op>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_MOV<1, sop, op, "d", sFPR64Op>;
+}
+
+multiclass FT_XZ<bits<6> sop, string op, PatFrag opnode> {
+ def _S : F_XZ<0, sop, op, "s", opnode, sFPR32Op>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_XZ<1, sop, op, "d", opnode, sFPR64Op>;
+}
+
+let vrz = 0, isCompare = 1 in {
+class F_CMPXY<bits<5> datatype, bits<6> sop, string op, string op_su, RegisterOperand regtype>
+ : F_XYZ_BASE<datatype, sop, (outs CARRY:$ca), (ins regtype:$vrx, regtype:$vry), !strconcat(op#op_su, "\t$vrx, $vry"),
+ []>;
+
+let vry = 0 in{
+class F_CMPZX<bits<5> datatype, bits<6> sop, string op, string op_su, RegisterOperand regtype>
+ : F_XYZ_BASE<datatype, sop, (outs CARRY:$ca), (ins regtype:$vrx), !strconcat(op#op_su, "\t$vrx"),
+ []>;
+}
+}
+
+class F_XYZ<bits<5> datatype, bits<6> sop, string op, string op_su, PatFrag opnode, RegisterOperand regtype>
+ : F_XYZ_BASE<datatype, sop, (outs regtype:$vrz), (ins regtype:$vrx, regtype:$vry),
+ !strconcat(op#op_su, "\t$vrz, $vrx, $vry"),
+ [(set regtype:$vrz, (opnode regtype:$vrx, regtype:$vry))]>;
+
+multiclass FT_XYZ<bits<6> sop, string op, PatFrag opnode> {
+ def _S : F_XYZ<0, sop, op, "s", opnode, sFPR32Op>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_XYZ<1, sop, op, "d", opnode, sFPR64Op>;
+}
+
+let Constraints = "$vrt = $vrz" in {
+class F_ACCUM_XYZ<bits<5> datatype, bits<6> sop, string op, string op_su, PatFrag opnode, RegisterOperand regtype>
+ : F_XYZ_BASE<datatype, sop, (outs regtype:$vrz), (ins regtype:$vrt, regtype:$vrx, regtype:$vry),
+ !strconcat(op#op_su, "\t$vrz, $vrx, $vry"),
+ [(set regtype:$vrz, (opnode regtype:$vrt, regtype:$vrx, regtype:$vry))]>;
+}
+
+multiclass FT_ACCUM_XYZ<bits<6> sop, string op, PatFrag opnode> {
+ def _S : F_ACCUM_XYZ<0, sop, op, "s", opnode, sFPR32Op>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_ACCUM_XYZ<1, sop, op, "d", opnode, sFPR64Op>;
+}
+
+multiclass FT_CMPXY<bits<6> sop, string op> {
+ def _S : F_CMPXY<0, sop, op, "s", sFPR32Op>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_CMPXY<1, sop, op, "d", sFPR64Op>;
+}
+
+
+multiclass FT_CMPZX<bits<6> sop, string op> {
+ def _S : F_CMPZX<0, sop, op, "s", sFPR32Op>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_CMPZX<1, sop, op, "d", sFPR64Op>;
+}
+
+class F_I8_XY_MEM<bits<7> sop, bits<1> sop_su, dag outs, dag ins, string opcodestr, list<dag> pattern>
+ : CSKY32Inst<AddrMode32SDF, 0x3d, outs, ins, opcodestr, pattern> {
+ bits<5> rx;
+ bits<4> vrz;
+ bits<8> imm8;
+ let Inst{25} = 0;
+ let Inst{24 - 21} = imm8{7 - 4}; //imm4h
+ let Inst{20 - 16} = rx; //rx
+ let Inst{15 - 9} = sop;
+ let Inst{8} = sop_su;
+ let Inst{7 - 4} = imm8{3 - 0}; // imm4l
+ let Inst{3 - 0} = vrz;
+}
+
+class F_I4_XY_MEM<bits<7> sop, bits<1> sop_su, dag outs, dag ins, string opcodestr, list<dag> pattern>
+ : CSKY32Inst<AddrMode32SDF, 0x3d, outs, ins, opcodestr, pattern> {
+ bits<10> regs;
+ bits<5> rx;
+
+ let Inst{25} = 0;
+ let Inst{24 - 21} = regs{3-0}; //imm4
+ let Inst{20 - 16} = rx; //rx
+ let Inst{15 - 9} = sop;
+ let Inst{8} = sop_su;
+ let Inst{7 - 4} = 0;
+ let Inst{3 - 0} = regs{8-5};
+}
+
+class F_I8_Z_MEM<bits<7> sop, bits<1> sop_su, dag outs, dag ins, string opcodestr, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x3d, outs, ins, opcodestr, pattern> {
+ bits<4> vrz;
+ bits<8> imm8;
+ let Inst{25} = 0;
+ let Inst{24 - 21} = imm8{7 - 4}; //imm4h
+ let Inst{20 - 16} = 0; //rx
+ let Inst{15 - 9} = sop;
+ let Inst{8} = sop_su;
+ let Inst{7 - 4} = imm8{3 - 0}; // imm4l
+ let Inst{3 - 0} = vrz;
+}
+
+class F_XYZ_MEM<bits<7> sop, bits<1> sop_su, dag outs, dag ins, string opcodestr, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x3d, outs, ins, opcodestr, pattern> {
+ bits<5> rx;
+ bits<5> ry;
+ bits<4> vrz;
+ bits<2> imm;
+
+ let Inst{25 - 21} = ry; // ry;
+ let Inst{20 - 16} = rx; // rx;
+ let Inst{15 - 9} = sop;
+ let Inst{8} = sop_su;
+ let Inst{7} = 0;
+ let Inst{6,5} = imm; // shift;
+ let Inst{4} = 0;
+ let Inst{3 - 0} = vrz;
+}
+
+class F_XYAI_LD<bits<7> sop, bits<1> sop_su, string op, string op_su,
+ RegisterOperand regtype, Operand operand>
+ : F_I8_XY_MEM<sop, sop_su, (outs regtype:$vrz), (ins GPR:$rx, operand:$imm8),
+ !strconcat(op#op_su, "\t$vrz, ($rx, ${imm8})"), []>;
+
+class F_XYAR_LD<bits<7> sop, bits<1> sop_su, string op, string op_su,
+ RegisterOperand regtype>
+ : F_XYZ_MEM<sop, sop_su, (outs regtype:$vrz), (ins GPR:$rx, GPR:$ry, uimm2:$imm),
+ op#op_su#"\t$vrz, ($rx, $ry << ${imm})", []>;
+
+class F_XYAI_ST<bits<7> sop, bits<1> sop_su, string op, string op_su,
+ RegisterOperand regtype, Operand operand>
+ : F_I8_XY_MEM<sop, sop_su, (outs), (ins regtype:$vrz, GPR:$rx, operand:$imm8),
+ !strconcat(op#op_su, "\t$vrz, ($rx, ${imm8})"), []>;
+
+class F_XYAR_ST<bits<7> sop, bits<1> sop_su, string op, string op_su,
+ RegisterOperand regtype>
+ : F_XYZ_MEM<sop, sop_su, (outs), (ins regtype:$vrz, GPR:$rx, GPR:$ry, uimm2:$imm),
+ op#op_su#"\t$vrz, ($rx, $ry << ${imm})", []>;
+
+def Mem8SL2 : Operand<iPTR>, ComplexPattern<iPTR, 2, "SelectAddrRegImm8", []> {
+ let MIOperandInfo = (ops GPR, i32imm);
+ let PrintMethod = "printAddrModeRegImmOperand";
+ let EncoderMethod = "getAddrModeFloatImm8_sl2OpValue";
+}
+
+def FRRS : Operand<iPTR>, ComplexPattern<iPTR, 3, "SelectAddrRegReg", []> {
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
+ let PrintMethod = "printAddrModeRegRegSLOperand";
+ let EncoderMethod = "getAddrModeFloatRegRegSLOpValue";
+}
+
+multiclass FT_XYAI_LD<bits<7> sop, string op> {
+ def _S : F_XYAI_LD<sop, 0, op, "s", sFPR32Op, uimm8_2>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_XYAI_LD<sop, 1, op, "d", sFPR64Op, uimm8_2>;
+}
+
+multiclass FT_XYAR_LD<bits<7> sop, string op> {
+ def _S : F_XYAR_LD<sop, 0, op, "s", sFPR32Op>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_XYAR_LD<sop, 1, op, "d", sFPR64Op>;
+}
+
+multiclass FT_XYAI_ST<bits<7> sop, string op> {
+ def _S : F_XYAI_ST<sop, 0, op, "s", sFPR32Op, uimm8_2>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_XYAI_ST<sop, 1, op, "d", sFPR64Op, uimm8_2>;
+}
+
+multiclass FT_XYAR_ST<bits<7> sop, string op> {
+ def _S : F_XYAR_ST<sop, 0, op, "s", sFPR32Op>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_XYAR_ST<sop, 1, op, "d", sFPR64Op>;
+}
+
+multiclass FT_XYAR_STM<bits<7> sop, string op> {
+ def _S : F_I4_XY_MEM<sop, 0, (outs),
+ (ins GPR:$rx, regseq_f1:$regs, variable_ops),
+ !strconcat(op#"s", "\t$regs, (${rx})"), []>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_I4_XY_MEM<sop, 1, (outs),
+ (ins GPR:$rx, regseq_d1:$regs, variable_ops),
+ !strconcat(op#"d", "\t$regs, (${rx})"), []>;
+}
+
+multiclass FT_XYAR_LDM<bits<7> sop, string op> {
+ def _S : F_I4_XY_MEM<sop, 0, (outs),
+ (ins GPR:$rx, regseq_f1:$regs, variable_ops),
+ !strconcat(op#"s", "\t$regs, (${rx})"), []>;
+ let Predicates = [HasFPUv2_DF] in
+ def _D : F_I4_XY_MEM<sop, 1, (outs),
+ (ins GPR:$rx, regseq_d1:$regs, variable_ops),
+ !strconcat(op#"d", "\t$regs, (${rx})"), []>;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormatsF2.td b/llvm/lib/Target/CSKY/CSKYInstrFormatsF2.td
new file mode 100644
index 000000000000..641ad623f140
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrFormatsF2.td
@@ -0,0 +1,208 @@
+//===- CSKYInstrFormatsF2.td - CSKY Float2.0 Instr Format --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// CSKY Instruction Format Float2.0 Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class CSKYInstF2<AddrMode am, dag outs, dag ins, string opcodestr,
+ list<dag> pattern>
+ : CSKY32Inst<am, 0x3d, outs, ins, opcodestr, pattern> {
+ let Predicates = [HasFPUv3_SF];
+ let DecoderNamespace = "FPUV3";
+}
+
+class F2_XYZ<bits<5> datatype, bits<6> sop, string opcodestr, dag outs, dag ins,
+ list<dag> pattern>
+ : CSKYInstF2<AddrModeNone, outs, ins, opcodestr, pattern> {
+ bits<5> vry;
+ bits<5> vrx;
+ bits<5> vrz;
+
+ let Inst{25-21} = vry;
+ let Inst{20-16} = vrx;
+ let Inst{15-11} = datatype;
+ let Inst{10-5} = sop;
+ let Inst{4-0} = vrz;
+}
+
+multiclass F2_XYZ_T<bits<6> sop, string op, PatFrag opnode> {
+ def _S : F2_XYZ<0b00000, sop, op#".32"#"\t$vrz, $vrx, $vry",
+ (outs FPR32Op:$vrz), (ins FPR32Op:$vrx, FPR32Op:$vry),
+ [(set FPR32Op:$vrz, (opnode FPR32Op:$vrx, FPR32Op:$vry))]>;
+ let Predicates = [HasFPUv3_DF] in
+ def _D : F2_XYZ<0b00001, sop, op#".64"#"\t$vrz, $vrx, $vry",
+ (outs FPR64Op:$vrz), (ins FPR64Op:$vrx, FPR64Op:$vry),
+ [(set FPR64Op:$vrz, (opnode FPR64Op:$vrx, FPR64Op:$vry))]>;
+}
+
+let Constraints = "$vrZ = $vrz" in
+multiclass F2_XYZZ_T<bits<6> sop, string op, PatFrag opnode> {
+ def _S : F2_XYZ<0b00000, sop, op#".32"#"\t$vrz, $vrx, $vry",
+ (outs FPR32Op:$vrz), (ins FPR32Op:$vrZ, FPR32Op:$vrx, FPR32Op:$vry),
+ [(set FPR32Op:$vrz, (opnode FPR32Op:$vrx, FPR32Op:$vry, FPR32Op:$vrZ))]>;
+ let Predicates = [HasFPUv3_DF] in
+ def _D : F2_XYZ<0b00001, sop, op#".64"#"\t$vrz, $vrx, $vry",
+ (outs FPR64Op:$vrz), (ins FPR64Op:$vrZ, FPR64Op:$vrx, FPR64Op:$vry),
+ [(set FPR64Op:$vrz, (opnode FPR64Op:$vrx, FPR64Op:$vry, FPR64Op:$vrZ))]>;
+}
+
+let vry = 0 in {
+class F2_XZ<bits<5> datatype, RegisterOperand regtype, bits<6> sop, string op, SDNode opnode>
+ : F2_XYZ<datatype, sop, !strconcat(op, "\t$vrz, $vrx"),
+ (outs regtype:$vrz), (ins regtype:$vrx),
+ [(set regtype:$vrz, (opnode regtype:$vrx))]>;
+
+class F2_XZ_SET<bits<5> datatype, RegisterOperand regtype, bits<6> sop, string op>
+ : F2_XYZ<datatype, sop, !strconcat(op, "\t$vrz, $vrx"),
+ (outs regtype:$vrz), (ins regtype:$vrx),
+ []>;
+
+class F2_XZ_P<bits<5> datatype, bits<6> sop, string op, list<dag> pattern = [],
+ dag outs, dag ins>
+ : F2_XYZ<datatype, sop, op#"\t$vrz, $vrx", outs, ins, pattern>;
+}
+
+multiclass F2_XZ_RM<bits<5> datatype, bits<4> sop, string op, dag outs, dag ins> {
+ def _RN : F2_XZ_P<datatype, {sop, 0b00}, op#".rn", [], outs, ins>;
+ def _RZ : F2_XZ_P<datatype, {sop, 0b01}, op#".rz", [], outs, ins>;
+ def _RPI : F2_XZ_P<datatype, {sop, 0b10}, op#".rpi", [], outs, ins>;
+ def _RNI : F2_XZ_P<datatype, {sop, 0b11}, op#".rni", [], outs, ins>;
+}
+
+multiclass F2_XZ_T<bits<6> sop, string op, SDNode opnode> {
+ def _S : F2_XZ<0b00000, FPR32Op, sop, op#".32", opnode>;
+ let Predicates = [HasFPUv3_DF] in
+ def _D : F2_XZ<0b00001, FPR64Op, sop, op#".64", opnode>;
+}
+
+multiclass F2_XZ_SET_T<bits<6> sop, string op, string suffix = ""> {
+ def _S : F2_XZ_SET<0b00000, FPR32Op, sop, op#".32"#suffix>;
+ let Predicates = [HasFPUv3_DF] in
+ def _D : F2_XZ_SET<0b00001, FPR64Op, sop, op#".64"#suffix>;
+}
+
+
+let vrz = 0, isCompare = 1 in
+class F2_CXY<bits<5> datatype, RegisterOperand regtype, bits<6> sop, string op>
+ : F2_XYZ<datatype, sop, !strconcat(op, "\t$vrx, $vry"),
+ (outs CARRY:$ca), (ins regtype:$vrx, regtype:$vry),
+ []>;
+
+multiclass F2_CXY_T<bits<6> sop, string op> {
+ def _S : F2_CXY<0b00000, FPR32Op, sop, op#".32">;
+ let Predicates = [HasFPUv3_DF] in
+ def _D : F2_CXY<0b00001, FPR64Op, sop, op#".64">;
+}
+
+
+let vrz = 0, vry = 0, isCompare = 1 in
+class F2_CX<bits<5> datatype, RegisterOperand regtype, bits<6> sop, string op>
+ : F2_XYZ<datatype, sop, !strconcat(op, "\t$vrx"),
+ (outs CARRY:$ca), (ins regtype:$vrx),
+ []>;
+
+multiclass F2_CX_T<bits<6> sop, string op> {
+ def _S : F2_CX<0b00000, FPR32Op, sop, op#".32">;
+ let Predicates = [HasFPUv3_DF] in
+ def _D : F2_CX<0b00001, FPR64Op, sop, op#".64">;
+}
+
+
+class F2_LDST<bits<2> datatype, bits<1> sop, string op, dag outs, dag ins>
+ : CSKYInstF2<AddrMode32SDF, outs, ins,
+ !strconcat(op, "\t$vrz, ($rx, ${imm8})"), []> {
+ bits<10> imm8;
+ bits<5> rx;
+ bits<5> vrz;
+
+ let Inst{25} = vrz{4};
+ let Inst{24-21} = imm8{7-4};
+ let Inst{20-16} = rx;
+ let Inst{15-11} = 0b00100;
+ let Inst{10} = sop;
+ let Inst{9-8} = datatype;
+ let Inst{7-4} = imm8{3-0};
+ let Inst{3-0} = vrz{3-0};
+}
+
+class F2_LDST_S<bits<1> sop, string op, dag outs, dag ins>
+ : F2_LDST<0b00, sop, op#".32", outs, ins>;
+class F2_LDST_D<bits<1> sop, string op, dag outs, dag ins>
+ : F2_LDST<0b01, sop, op#".64", outs, ins>;
+
+class F2_LDSTM<bits<2> datatype, bits<1> sop, bits<3> sop2, string op, dag outs, dag ins>
+ : CSKYInstF2<AddrMode32SDF, outs, ins,
+ !strconcat(op, "\t$regs, (${rx})"), []> {
+ bits<10> regs;
+ bits<5> rx;
+
+ let Inst{25-21} = regs{4-0};
+ let Inst{20-16} = rx;
+ let Inst{15-11} = 0b00110;
+ let Inst{10} = sop;
+ let Inst{9-8} = datatype;
+ let Inst{7-5} = sop2;
+ let Inst{4-0} = regs{9-5};
+}
+
+class F2_LDSTM_S<bits<1> sop, bits<3> sop2, string op, dag outs, dag ins>
+ : F2_LDSTM<0b00, sop, sop2, op#".32", outs, ins>;
+class F2_LDSTM_D<bits<1> sop, bits<3> sop2, string op, dag outs, dag ins>
+ : F2_LDSTM<0b01, sop, sop2, op#".64", outs, ins>;
+
+
+class F2_LDSTR<bits<2> datatype, bits<1> sop, string op, dag outs, dag ins>
+ : CSKYInstF2<AddrModeNone, outs, ins,
+ op#"\t$rz, ($rx, $ry << ${imm})", []> {
+ bits<5> rx;
+ bits<5> ry;
+ bits<5> rz;
+ bits<2> imm;
+
+ let Inst{25-21} = ry;
+ let Inst{20-16} = rx;
+ let Inst{15-11} = 0b00101;
+ let Inst{10} = sop;
+ let Inst{9-8} = datatype;
+ let Inst{7} = 0;
+ let Inst{6-5} = imm;
+ let Inst{4-0} = rz;
+}
+
+class F2_LDSTR_S<bits<1> sop, string op, dag outs, dag ins>
+ : F2_LDSTR<0b00, sop, op#".32", outs, ins>;
+class F2_LDSTR_D<bits<1> sop, string op, dag outs, dag ins>
+ : F2_LDSTR<0b01, sop, op#".64", outs, ins>;
+
+class F2_CXYZ<bits<5> datatype, RegisterOperand regtype, bits<6> sop, string op>
+ : F2_XYZ<datatype, sop, !strconcat(op, "\t$vrz, $vrx, $vry"),
+ (outs regtype:$vrz), (ins CARRY:$ca, regtype:$vrx, regtype:$vry),
+ []>;
+multiclass F2_CXYZ_T<bits<6> sop, string op> {
+ def _S : F2_CXYZ<0b00000, FPR32Op, sop, op#".32">;
+ let Predicates = [HasFPUv3_DF] in
+ def _D : F2_CXYZ<0b00001, FPR64Op, sop, op#".64">;
+}
+
+class F2_LRW<bits<2> datatype, bits<1> sop, string op, dag outs, dag ins>
+ : CSKYInstF2<AddrModeNone, outs, ins,
+ !strconcat(op, "\t$vrz, ${imm8}"), []> {
+ bits<10> imm8;
+ bits<5> rx;
+ bits<5> vrz;
+
+ let Inst{25} = vrz{4};
+ let Inst{24-21} = imm8{7-4};
+ let Inst{20-16} = 0;
+ let Inst{15-11} = 0b00111;
+ let Inst{10} = sop;
+ let Inst{9-8} = datatype;
+ let Inst{7-4} = imm8{3-0};
+ let Inst{3-0} = vrz{3-0};
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
index 6fcb136cd99b..c57ccb9d6eea 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CSKYInstrInfo.h"
+#include "CSKYConstantPoolValue.h"
#include "CSKYMachineFunctionInfo.h"
#include "CSKYTargetMachine.h"
#include "llvm/MC/MCContext.h"
@@ -24,6 +25,199 @@ using namespace llvm;
CSKYInstrInfo::CSKYInstrInfo(CSKYSubtarget &STI)
: CSKYGenInstrInfo(CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP), STI(STI) {
+ v2sf = STI.hasFPUv2SingleFloat();
+ v2df = STI.hasFPUv2DoubleFloat();
+ v3sf = STI.hasFPUv3SingleFloat();
+ v3df = STI.hasFPUv3DoubleFloat();
+}
+
+static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
+ SmallVectorImpl<MachineOperand> &Cond) {
+ // Block ends with fall-through condbranch.
+ assert(LastInst.getDesc().isConditionalBranch() &&
+ "Unknown conditional branch");
+ Target = LastInst.getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(LastInst.getOpcode()));
+ Cond.push_back(LastInst.getOperand(0));
+}
+
+bool CSKYInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ TBB = FBB = nullptr;
+ Cond.clear();
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end() || !isUnpredicatedTerminator(*I))
+ return false;
+
+ // Count the number of terminators and find the first unconditional or
+ // indirect branch.
+ MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
+ int NumTerminators = 0;
+ for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
+ J++) {
+ NumTerminators++;
+ if (J->getDesc().isUnconditionalBranch() ||
+ J->getDesc().isIndirectBranch()) {
+ FirstUncondOrIndirectBr = J.getReverse();
+ }
+ }
+
+ // If AllowModify is true, we can erase any terminators after
+ // FirstUncondOrIndirectBR.
+ if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
+ while (std::next(FirstUncondOrIndirectBr) != MBB.end()) {
+ std::next(FirstUncondOrIndirectBr)->eraseFromParent();
+ NumTerminators--;
+ }
+ I = FirstUncondOrIndirectBr;
+ }
+
+ // We can't handle blocks that end in an indirect branch.
+ if (I->getDesc().isIndirectBranch())
+ return true;
+
+ // We can't handle blocks with more than 2 terminators.
+ if (NumTerminators > 2)
+ return true;
+
+ // Handle a single unconditional branch.
+ if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
+ TBB = getBranchDestBlock(*I);
+ return false;
+ }
+
+ // Handle a single conditional branch.
+ if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
+ parseCondBranch(*I, TBB, Cond);
+ return false;
+ }
+
+ // Handle a conditional branch followed by an unconditional branch.
+ if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
+ I->getDesc().isUnconditionalBranch()) {
+ parseCondBranch(*std::prev(I), TBB, Cond);
+ FBB = getBranchDestBlock(*I);
+ return false;
+ }
+
+ // Otherwise, we can't handle this.
+ return true;
+}
+
+unsigned CSKYInstrInfo::removeBranch(MachineBasicBlock &MBB,
+ int *BytesRemoved) const {
+ if (BytesRemoved)
+ *BytesRemoved = 0;
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return 0;
+
+ if (!I->getDesc().isUnconditionalBranch() &&
+ !I->getDesc().isConditionalBranch())
+ return 0;
+
+ // Remove the branch.
+ if (BytesRemoved)
+ *BytesRemoved += getInstSizeInBytes(*I);
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin())
+ return 1;
+ --I;
+ if (!I->getDesc().isConditionalBranch())
+ return 1;
+
+ // Remove the branch.
+ if (BytesRemoved)
+ *BytesRemoved += getInstSizeInBytes(*I);
+ I->eraseFromParent();
+ return 2;
+}
+
+MachineBasicBlock *
+CSKYInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
+ assert(MI.getDesc().isBranch() && "Unexpected opcode!");
+ // The branch target is always the last operand.
+ int NumOp = MI.getNumExplicitOperands();
+ assert(MI.getOperand(NumOp - 1).isMBB() && "Expected MBB!");
+ return MI.getOperand(NumOp - 1).getMBB();
+}
+
+unsigned CSKYInstrInfo::insertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
+ if (BytesAdded)
+ *BytesAdded = 0;
+
+ // Shouldn't be a fall through.
+ assert(TBB && "insertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "CSKY branch conditions have two components!");
+
+ // Unconditional branch.
+ if (Cond.empty()) {
+ MachineInstr &MI = *BuildMI(&MBB, DL, get(CSKY::BR32)).addMBB(TBB);
+ if (BytesAdded)
+ *BytesAdded += getInstSizeInBytes(MI);
+ return 1;
+ }
+
+ // Either a one or two-way conditional branch.
+ unsigned Opc = Cond[0].getImm();
+ MachineInstr &CondMI = *BuildMI(&MBB, DL, get(Opc)).add(Cond[1]).addMBB(TBB);
+ if (BytesAdded)
+ *BytesAdded += getInstSizeInBytes(CondMI);
+
+ // One-way conditional branch.
+ if (!FBB)
+ return 1;
+
+ // Two-way conditional branch.
+ MachineInstr &MI = *BuildMI(&MBB, DL, get(CSKY::BR32)).addMBB(FBB);
+ if (BytesAdded)
+ *BytesAdded += getInstSizeInBytes(MI);
+ return 2;
+}
+
+static unsigned getOppositeBranchOpc(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unknown conditional branch!");
+ case CSKY::BT32:
+ return CSKY::BF32;
+ case CSKY::BT16:
+ return CSKY::BF16;
+ case CSKY::BF32:
+ return CSKY::BT32;
+ case CSKY::BF16:
+ return CSKY::BT16;
+ case CSKY::BHZ32:
+ return CSKY::BLSZ32;
+ case CSKY::BHSZ32:
+ return CSKY::BLZ32;
+ case CSKY::BLZ32:
+ return CSKY::BHSZ32;
+ case CSKY::BLSZ32:
+ return CSKY::BHZ32;
+ case CSKY::BNEZ32:
+ return CSKY::BEZ32;
+ case CSKY::BEZ32:
+ return CSKY::BNEZ32;
+ }
+}
+
+bool CSKYInstrInfo::reverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ assert((Cond.size() == 2) && "Invalid branch condition!");
+ Cond[0].setImm(getOppositeBranchOpc(Cond[0].getImm()));
+ return false;
}
Register CSKYInstrInfo::movImm(MachineBasicBlock &MBB,
@@ -147,6 +341,10 @@ unsigned CSKYInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
case CSKY::LD32H:
case CSKY::LD32HS:
case CSKY::LD32W:
+ case CSKY::FLD_S:
+ case CSKY::FLD_D:
+ case CSKY::f2FLD_S:
+ case CSKY::f2FLD_D:
case CSKY::RESTORE_CARRY:
break;
}
@@ -171,6 +369,10 @@ unsigned CSKYInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
case CSKY::ST32B:
case CSKY::ST32H:
case CSKY::ST32W:
+ case CSKY::FST_S:
+ case CSKY::FST_D:
+ case CSKY::f2FST_S:
+ case CSKY::f2FST_D:
case CSKY::SPILL_CARRY:
break;
}
@@ -204,7 +406,15 @@ void CSKYInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
} else if (CSKY::CARRYRegClass.hasSubClassEq(RC)) {
Opcode = CSKY::SPILL_CARRY;
CFI->setSpillsCR();
- } else {
+ } else if (v2sf && CSKY::sFPR32RegClass.hasSubClassEq(RC))
+ Opcode = CSKY::FST_S;
+ else if (v2df && CSKY::sFPR64RegClass.hasSubClassEq(RC))
+ Opcode = CSKY::FST_D;
+ else if (v3sf && CSKY::FPR32RegClass.hasSubClassEq(RC))
+ Opcode = CSKY::f2FST_S;
+ else if (v3df && CSKY::FPR64RegClass.hasSubClassEq(RC))
+ Opcode = CSKY::f2FST_D;
+ else {
llvm_unreachable("Unknown RegisterClass");
}
@@ -239,7 +449,15 @@ void CSKYInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
} else if (CSKY::CARRYRegClass.hasSubClassEq(RC)) {
Opcode = CSKY::RESTORE_CARRY;
CFI->setSpillsCR();
- } else {
+ } else if (v2sf && CSKY::sFPR32RegClass.hasSubClassEq(RC))
+ Opcode = CSKY::FLD_S;
+ else if (v2df && CSKY::sFPR64RegClass.hasSubClassEq(RC))
+ Opcode = CSKY::FLD_D;
+ else if (v3sf && CSKY::FPR32RegClass.hasSubClassEq(RC))
+ Opcode = CSKY::f2FLD_S;
+ else if (v3df && CSKY::FPR64RegClass.hasSubClassEq(RC))
+ Opcode = CSKY::f2FLD_D;
+ else {
llvm_unreachable("Unknown RegisterClass");
}
@@ -302,6 +520,38 @@ void CSKYInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned Opcode = 0;
if (CSKY::GPRRegClass.contains(DestReg, SrcReg))
Opcode = CSKY::MOV32;
+ else if (v2sf && CSKY::sFPR32RegClass.contains(DestReg, SrcReg))
+ Opcode = CSKY::FMOV_S;
+ else if (v3sf && CSKY::FPR32RegClass.contains(DestReg, SrcReg))
+ Opcode = CSKY::f2FMOV_S;
+ else if (v2df && CSKY::sFPR64RegClass.contains(DestReg, SrcReg))
+ Opcode = CSKY::FMOV_D;
+ else if (v3df && CSKY::FPR64RegClass.contains(DestReg, SrcReg))
+ Opcode = CSKY::f2FMOV_D;
+ else if (v2sf && CSKY::sFPR32RegClass.contains(SrcReg) &&
+ CSKY::GPRRegClass.contains(DestReg))
+ Opcode = CSKY::FMFVRL;
+ else if (v3sf && CSKY::FPR32RegClass.contains(SrcReg) &&
+ CSKY::GPRRegClass.contains(DestReg))
+ Opcode = CSKY::f2FMFVRL;
+ else if (v2df && CSKY::sFPR64RegClass.contains(SrcReg) &&
+ CSKY::GPRRegClass.contains(DestReg))
+ Opcode = CSKY::FMFVRL_D;
+ else if (v3df && CSKY::FPR64RegClass.contains(SrcReg) &&
+ CSKY::GPRRegClass.contains(DestReg))
+ Opcode = CSKY::f2FMFVRL_D;
+ else if (v2sf && CSKY::GPRRegClass.contains(SrcReg) &&
+ CSKY::sFPR32RegClass.contains(DestReg))
+ Opcode = CSKY::FMTVRL;
+ else if (v3sf && CSKY::GPRRegClass.contains(SrcReg) &&
+ CSKY::FPR32RegClass.contains(DestReg))
+ Opcode = CSKY::f2FMTVRL;
+ else if (v2df && CSKY::GPRRegClass.contains(SrcReg) &&
+ CSKY::sFPR64RegClass.contains(DestReg))
+ Opcode = CSKY::FMTVRL_D;
+ else if (v3df && CSKY::GPRRegClass.contains(SrcReg) &&
+ CSKY::FPR64RegClass.contains(DestReg))
+ Opcode = CSKY::f2FMTVRL_D;
else {
LLVM_DEBUG(dbgs() << "src = " << SrcReg << ", dst = " << DestReg);
LLVM_DEBUG(I->dump());
@@ -311,3 +561,58 @@ void CSKYInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(Opcode), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
+
+Register CSKYInstrInfo::getGlobalBaseReg(MachineFunction &MF) const {
+ CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+ MachineConstantPool *MCP = MF.getConstantPool();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ Register GlobalBaseReg = CFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert a pseudo instruction to set the GlobalBaseReg into the first
+ // MBB of the function
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL;
+
+ CSKYConstantPoolValue *CPV = CSKYConstantPoolSymbol::Create(
+ Type::getInt32Ty(MF.getFunction().getContext()), "_GLOBAL_OFFSET_TABLE_",
+ 0, CSKYCP::ADDR);
+
+ unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
+
+ MachineMemOperand *MO =
+ MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
+ MachineMemOperand::MOLoad, 4, Align(4));
+ BuildMI(FirstMBB, MBBI, DL, get(CSKY::LRW32), CSKY::R28)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(MO);
+
+ GlobalBaseReg = MRI.createVirtualRegister(&CSKY::GPRRegClass);
+ BuildMI(FirstMBB, MBBI, DL, get(TargetOpcode::COPY), GlobalBaseReg)
+ .addReg(CSKY::R28);
+
+ CFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
+
+unsigned CSKYInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ default:
+ return MI.getDesc().getSize();
+ case CSKY::CONSTPOOL_ENTRY:
+ return MI.getOperand(2).getImm();
+ case CSKY::SPILL_CARRY:
+ case CSKY::RESTORE_CARRY:
+ case CSKY::PseudoTLSLA32:
+ return 8;
+ case TargetOpcode::INLINEASM_BR:
+ case TargetOpcode::INLINEASM: {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const char *AsmStr = MI.getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ }
+ }
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
index 450641d96b74..1a1bbbf9154f 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
@@ -24,6 +24,11 @@ namespace llvm {
class CSKYSubtarget;
class CSKYInstrInfo : public CSKYGenInstrInfo {
+ bool v2sf;
+ bool v2df;
+ bool v3sf;
+ bool v3df;
+
protected:
const CSKYSubtarget &STI;
@@ -50,6 +55,28 @@ public:
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
+ unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+ const DebugLoc &DL,
+ int *BytesAdded = nullptr) const override;
+
+ bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const override;
+
+ unsigned removeBranch(MachineBasicBlock &MBB,
+ int *BytesRemoved = nullptr) const override;
+
+ bool
+ reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
+
+ unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
+
+ Register getGlobalBaseReg(MachineFunction &MF) const;
+
// Materializes the given integer Val into DstReg.
Register movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, int64_t Val,
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
index 30d9206eec68..a782efe7f4f4 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
@@ -15,22 +15,42 @@
// CSKY specific DAG Nodes.
//===----------------------------------------------------------------------===//
+// Target-independent type requirements, but with target-specific formats.
def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
SDTCisVT<1, i32>]>;
def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
SDTCisVT<1, i32>]>;
+def SDT_CSKYCall : SDTypeProfile<0, 2, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+
+def SDT_CSKYCallReg : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+
+def SDT_CSKY_LOADADDR : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, iPTR>, SDTCisVT<2, iPTR>]>;
+
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
-
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-// Target-dependent nodes.
def CSKY_RET : SDNode<"CSKYISD::RET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def CSKY_CALL : SDNode<"CSKYISD::CALL", SDT_CSKYCall,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+
+def CSKY_CALLReg : SDNode<"CSKYISD::CALLReg", SDT_CSKYCallReg,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+
+def CSKY_TAIL : SDNode<"CSKYISD::TAIL", SDT_CSKYCall,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+
+def CSKY_TAILReg : SDNode<"CSKYISD::TAILReg", SDT_CSKYCallReg,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+
+def CSKY_LOAD_ADDR : SDNode<"CSKYISD::LOAD_ADDR", SDT_CSKY_LOADADDR>;
+
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
@@ -57,6 +77,24 @@ def to_tframeindex : SDNodeXForm<frameindex, [{
return CurDAG->getTargetFrameIndex(FI->getIndex(), TLI->getPointerTy(CurDAG->getDataLayout()));
}]>;
+def to_tconstpool : SDNodeXForm<constpool, [{
+ auto CP = cast<ConstantPoolSDNode>(N);
+ return CurDAG->getTargetConstantPool(CP->getConstVal(), TLI->getPointerTy(CurDAG->getDataLayout()),
+ CP->getAlign(), CP->getOffset(), CSKYII::MO_None);
+}]>;
+
+def to_tconstpool_hi16 : SDNodeXForm<constpool, [{
+ auto CP = cast<ConstantPoolSDNode>(N);
+ return CurDAG->getTargetConstantPool(CP->getConstVal(), TLI->getPointerTy(CurDAG->getDataLayout()),
+ CP->getAlign(), CP->getOffset(), CSKYII::MO_ADDR_HI16);
+}]>;
+
+def to_tconstpool_lo16 : SDNodeXForm<constpool, [{
+ auto CP = cast<ConstantPoolSDNode>(N);
+ return CurDAG->getTargetConstantPool(CP->getConstVal(), TLI->getPointerTy(CurDAG->getDataLayout()),
+ CP->getAlign(), CP->getOffset(), CSKYII::MO_ADDR_LO16);
+}]>;
+
class oimm<int num> : Operand<i32>,
ImmLeaf<i32, "return isUInt<"#num#">(Imm - 1);"> {
let EncoderMethod = "getOImmOpValue";
@@ -1055,6 +1093,178 @@ let Predicates = [iHas2E3] in {
def : Pat<(sext_inreg GPR:$src, i1), (SEXT32 GPR:$src, 0, 0)>;
def : Pat<(and GPR:$src, 255), (ZEXT32 GPR:$src, 7, 0)>;
def : Pat<(and GPR:$src, 65535), (ZEXT32 GPR:$src, 15, 0)>;
+
+ // Call Patterns
+ def : Pat<(CSKY_CALL tglobaladdr, tconstpool:$src2), (JSRI32 tconstpool:$src2)>;
+ def : Pat<(CSKY_CALL texternalsym, tconstpool:$src2), (JSRI32 tconstpool:$src2)>;
+ def : Pat<(CSKY_TAIL tglobaladdr, tconstpool:$src2), (JMPI32 tconstpool:$src2)>;
+ def : Pat<(CSKY_TAIL texternalsym, tconstpool:$src2), (JMPI32 tconstpool:$src2)>;
+
+ def : Pat<(CSKY_CALLReg GPR:$src), (JSR32 GPR:$src)>;
+ def : Pat<(CSKY_TAILReg GPR:$src), (JMP32 GPR:$src)>;
+}
+
+// Symbol address Patterns
+def : Pat<(CSKY_LOAD_ADDR tglobaladdr, tconstpool:$src2), (LRW32 tconstpool:$src2)>;
+def : Pat<(CSKY_LOAD_ADDR tblockaddress, tconstpool:$src2), (LRW32 tconstpool:$src2)>;
+def : Pat<(CSKY_LOAD_ADDR tjumptable:$src1, tconstpool:$src2), (LRW32_Gen tjumptable:$src1, tconstpool:$src2)>;
+def : Pat<(CSKY_LOAD_ADDR texternalsym, tconstpool:$src2), (LRW32 tconstpool:$src2)>;
+
+let Predicates = [iHas2E3] in
+ def : Pat<(i32 constpool:$src), (GRS32 (to_tconstpool tconstpool:$src))>;
+
+let Predicates = [iHasE2] in
+ def : Pat<(i32 constpool:$src),
+ (ORI32 (MOVIH32 (to_tconstpool_hi16 tconstpool:$src)),
+ (to_tconstpool_lo16 tconstpool:$src))>;
+
+def : Pat<(i32 (load constpool:$src)), (LRW32 (to_tconstpool tconstpool:$src))>;
+
+// Branch Patterns.
+let Predicates = [iHasE2] in {
+ def : Pat<(brcond CARRY:$ca, bb:$imm16),
+ (BT32 CARRY:$ca, bb:$imm16)>;
+
+ def : Pat<(brcond (i32 (setne GPR:$rs1, uimm16:$rs2)), bb:$imm16),
+ (BT32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), bb:$imm16)>;
+ def : Pat<(brcond (i32 (seteq GPR:$rs1, uimm16:$rs2)), bb:$imm16),
+ (BF32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), bb:$imm16)>;
+ def : Pat<(brcond (i32 (setuge GPR:$rs1, oimm16:$rs2)), bb:$imm16),
+ (BT32 (CMPHSI32 GPR:$rs1, oimm16:$rs2), bb:$imm16)>;
+ def : Pat<(brcond (i32 (setult GPR:$rs1, oimm16:$rs2)), bb:$imm16),
+ (BF32 (CMPHSI32 GPR:$rs1, oimm16:$rs2), bb:$imm16)>;
+ def : Pat<(brcond (i32 (setlt GPR:$rs1, oimm16:$rs2)), bb:$imm16),
+ (BT32 (CMPLTI32 GPR:$rs1, oimm16:$rs2), bb:$imm16)>;
+ def : Pat<(brcond (i32 (setge GPR:$rs1, oimm16:$rs2)), bb:$imm16),
+ (BF32 (CMPLTI32 GPR:$rs1, oimm16:$rs2), bb:$imm16)>;
+
+}
+
+let Predicates = [iHas2E3] in {
+
+def : Pat<(brcond (i32 (setne GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (BT32 (CMPNE32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
+def : Pat<(brcond (i32 (seteq GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (BF32 (CMPNE32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
+def : Pat<(brcond (i32 (setuge GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (BT32 (CMPHS32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
+def : Pat<(brcond (i32 (setule GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (BT32 (CMPHS32 GPR:$rs2, GPR:$rs1), bb:$imm16)>;
+def : Pat<(brcond (i32 (setult GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (BF32 (CMPHS32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
+def : Pat<(brcond (i32 (setugt GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (BF32 (CMPHS32 GPR:$rs2, GPR:$rs1), bb:$imm16)>;
+def : Pat<(brcond (i32 (setlt GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (BT32 (CMPLT32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
+def : Pat<(brcond (i32 (setgt GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (BT32 (CMPLT32 GPR:$rs2, GPR:$rs1), bb:$imm16)>;
+def : Pat<(brcond (i32 (setge GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (BF32 (CMPLT32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
+def : Pat<(brcond (i32 (setle GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (BF32 (CMPLT32 GPR:$rs2, GPR:$rs1), bb:$imm16)>;
+
+def : Pat<(brcond (i32 (seteq GPR:$rs1, (i32 0))), bb:$imm16),
+ (BEZ32 GPR:$rs1, bb:$imm16)>;
+def : Pat<(brcond (i32 (setne GPR:$rs1, (i32 0))), bb:$imm16),
+ (BNEZ32 GPR:$rs1, bb:$imm16)>;
+def : Pat<(brcond (i32 (setlt GPR:$rs1, (i32 0))), bb:$imm16),
+ (BLZ32 GPR:$rs1, bb:$imm16)>;
+def : Pat<(brcond (i32 (setge GPR:$rs1, (i32 0))), bb:$imm16),
+ (BHSZ32 GPR:$rs1, bb:$imm16)>;
+def : Pat<(brcond (i32 (setgt GPR:$rs1, (i32 0))), bb:$imm16),
+ (BHZ32 GPR:$rs1, bb:$imm16)>;
+def : Pat<(brcond (i32 (setle GPR:$rs1, (i32 0))), bb:$imm16),
+ (BLSZ32 GPR:$rs1, bb:$imm16)>;
+}
+
+// Compare Patterns.
+let Predicates = [iHas2E3] in {
+ def : Pat<(setne GPR:$rs1, GPR:$rs2),
+ (CMPNE32 GPR:$rs1, GPR:$rs2)>;
+ def : Pat<(i32 (seteq GPR:$rs1, GPR:$rs2)),
+ (MVCV32 (CMPNE32 GPR:$rs1, GPR:$rs2))>;
+ def : Pat<(setuge GPR:$rs1, GPR:$rs2),
+ (CMPHS32 GPR:$rs1, GPR:$rs2)>;
+ def : Pat<(setule GPR:$rs1, GPR:$rs2),
+ (CMPHS32 GPR:$rs2, GPR:$rs1)>;
+ def : Pat<(i32 (setult GPR:$rs1, GPR:$rs2)),
+ (MVCV32 (CMPHS32 GPR:$rs1, GPR:$rs2))>;
+ def : Pat<(i32 (setugt GPR:$rs1, GPR:$rs2)),
+ (MVCV32 (CMPHS32 GPR:$rs2, GPR:$rs1))>;
+ def : Pat<(setlt GPR:$rs1, GPR:$rs2),
+ (CMPLT32 GPR:$rs1, GPR:$rs2)>;
+ def : Pat<(setgt GPR:$rs1, GPR:$rs2),
+ (CMPLT32 GPR:$rs2, GPR:$rs1)>;
+ def : Pat<(i32 (setge GPR:$rs1, GPR:$rs2)),
+ (MVCV32 (CMPLT32 GPR:$rs1, GPR:$rs2))>;
+ def : Pat<(i32 (setle GPR:$rs1, GPR:$rs2)),
+ (MVCV32 (CMPLT32 GPR:$rs2, GPR:$rs1))>;
+}
+
+let Predicates = [iHasE2] in {
+ def : Pat<(setne GPR:$rs1, uimm16:$rs2),
+ (CMPNEI32 GPR:$rs1, uimm16:$rs2)>;
+ let Predicates = [iHas2E3] in
+ def : Pat<(i32 (seteq GPR:$rs1, uimm16:$rs2)),
+ (MVCV32 (CMPNEI32 GPR:$rs1, uimm16:$rs2))>;
+ def : Pat<(setuge GPR:$rs1, oimm16:$rs2),
+ (CMPHSI32 GPR:$rs1, oimm16:$rs2)>;
+ let Predicates = [iHas2E3] in
+ def : Pat<(i32 (setult GPR:$rs1, oimm16:$rs2)),
+ (MVCV32 (CMPHSI32 GPR:$rs1, oimm16:$rs2))>;
+ def : Pat<(setlt GPR:$rs1, oimm16:$rs2),
+ (CMPLTI32 GPR:$rs1, oimm16:$rs2)>;
+ let Predicates = [iHas2E3] in
+ def : Pat<(i32 (setge GPR:$rs1, oimm16:$rs2)),
+ (MVCV32 (CMPLTI32 GPR:$rs1, oimm16:$rs2))>;
+}
+
+// Select Patterns.
+let Predicates = [iHasE2] in {
+def : Pat<(select CARRY:$ca, GPR:$rx, GPR:$false),
+ (MOVT32 CARRY:$ca, GPR:$rx, GPR:$false)>;
+def : Pat<(select (and CARRY:$ca, 1), GPR:$rx, GPR:$false),
+ (MOVT32 CARRY:$ca, GPR:$rx, GPR:$false)>;
+
+def : Pat<(select (i32 (setne GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (MOVT32 (CMPNE32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (setne GPR:$rs1, uimm16:$rs2)), GPR:$rx, GPR:$false),
+ (MOVT32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (seteq GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (MOVF32 (CMPNE32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (seteq GPR:$rs1, uimm16:$rs2)), GPR:$rx, GPR:$false),
+ (MOVF32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), GPR:$rx, GPR:$false)>;
+
+def : Pat<(select (i32 (setuge GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (MOVT32 (CMPHS32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (setuge GPR:$rs1, oimm16:$rs2)), GPR:$rx, GPR:$false),
+ (MOVT32 (CMPHSI32 GPR:$rs1, oimm16:$rs2), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (setule GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (MOVT32 (CMPHS32 GPR:$rs2, GPR:$rs1), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (setult GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (MOVF32 (CMPHS32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (setult GPR:$rs1, oimm16:$rs2)), GPR:$rx, GPR:$false),
+ (MOVF32 (CMPHSI32 GPR:$rs1, oimm16:$rs2), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (setugt GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (MOVF32 (CMPHS32 GPR:$rs2, GPR:$rs1), GPR:$rx, GPR:$false)>;
+
+def : Pat<(select (i32 (setlt GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (MOVT32 (CMPLT32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (setlt GPR:$rs1, oimm16:$rs2)), GPR:$rx, GPR:$false),
+ (MOVT32 (CMPLTI32 GPR:$rs1, oimm16:$rs2), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (setgt GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (MOVT32 (CMPLT32 GPR:$rs2, GPR:$rs1), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (setge GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (MOVF32 (CMPLT32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (setge GPR:$rs1, oimm16:$rs2)), GPR:$rx, GPR:$false),
+ (MOVF32 (CMPLTI32 GPR:$rs1, oimm16:$rs2), GPR:$rx, GPR:$false)>;
+def : Pat<(select (i32 (setle GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (MOVF32 (CMPLT32 GPR:$rs2, GPR:$rs1), GPR:$rx, GPR:$false)>;
+
+def : Pat<(select CARRY:$ca, GPR:$rx, GPR:$false),
+ (ISEL32 CARRY:$ca, GPR:$rx, GPR:$false)>;
+def : Pat<(select (and CARRY:$ca, 1), GPR:$rx, GPR:$false),
+ (ISEL32 CARRY:$ca, GPR:$rx, GPR:$false)>;
}
// Constant materialize patterns.
@@ -1150,3 +1360,5 @@ def CONSTPOOL_ENTRY : CSKYPseudo<(outs),
(ins i32imm:$instid, i32imm:$cpidx, i32imm:$size), "", []>;
include "CSKYInstrInfo16Instr.td"
+include "CSKYInstrInfoF1.td"
+include "CSKYInstrInfoF2.td"
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfoF1.td b/llvm/lib/Target/CSKY/CSKYInstrInfoF1.td
new file mode 100644
index 000000000000..30cef024f35a
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfoF1.td
@@ -0,0 +1,420 @@
+//===- CSKYInstrInfoF1.td - CSKY Instruction Float1.0 ------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the CSKY instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def regseq_f1 : Operand<iPTR> {
+ let EncoderMethod = "getRegisterSeqOpValue";
+ let ParserMatchClass = RegSeqAsmOperand<"V1">;
+ let PrintMethod = "printRegisterSeq";
+ let DecoderMethod = "DecodeRegSeqOperandF1";
+ let MIOperandInfo = (ops sFPR32, uimm5);
+}
+
+def regseq_d1 : Operand<iPTR> {
+ let EncoderMethod = "getRegisterSeqOpValue";
+ let ParserMatchClass = RegSeqAsmOperand<"V1">;
+ let PrintMethod = "printRegisterSeq";
+ let DecoderMethod = "DecodeRegSeqOperandD1";
+ let MIOperandInfo = (ops sFPR64, uimm5);
+}
+
+def sFPR32Op : RegisterOperand<sFPR32, "printFPR">;
+def sFPR64Op : RegisterOperand<sFPR64, "printFPR">;
+def sFPR64_V_OP : RegisterOperand<sFPR64_V, "printFPR">;
+
+include "CSKYInstrFormatsF1.td"
+
+//===----------------------------------------------------------------------===//
+// CSKY specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_BITCAST_TO_LOHI : SDTypeProfile<2, 1, [SDTCisSameAs<0, 1>]>;
+def CSKY_BITCAST_TO_LOHI : SDNode<"CSKYISD::BITCAST_TO_LOHI", SDT_BITCAST_TO_LOHI>;
+def SDT_BITCAST_FROM_LOHI : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
+def CSKY_BITCAST_FROM_LOHI : SDNode<"CSKYISD::BITCAST_FROM_LOHI", SDT_BITCAST_FROM_LOHI>;
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
+
+def fpimm32_hi16 : SDNodeXForm<fpimm, [{
+ return CurDAG->getTargetConstant(
+ (N->getValueAPF().bitcastToAPInt().getZExtValue() >> 16) & 0xFFFF,
+ SDLoc(N), MVT::i32);
+}]>;
+
+def fpimm32_lo16 : SDNodeXForm<fpimm, [{
+ return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue() & 0xFFFF,
+ SDLoc(N), MVT::i32);
+}]>;
+
+class fpimm_xform<int width, int shift = 0> : SDNodeXForm<fpimm,
+ "return CurDAG->getTargetConstant(N->getValueAPF().bitcastToAPInt().lshr("#shift#").getLoBits("#width#"), SDLoc(N), MVT::i32);">;
+
+class fpimm_xform_i16<int width, int shift = 0> : SDNodeXForm<fpimm,
+ "return CurDAG->getTargetConstant(N->getValueAPF().bitcastToAPInt().lshr("#shift#").getLoBits("#width#"), SDLoc(N), MVT::i16);">;
+
+class fpimm_t<int width, int shift = 0> : PatLeaf<(fpimm),
+ "return isShiftedUInt<"#width#", "#shift#">(N->getValueAPF().bitcastToAPInt().getZExtValue());">;
+
+def fpimm8 : fpimm_t<8>;
+def fpimm8_8 : fpimm_t<8, 8>;
+def fpimm8_16 : fpimm_t<8, 16>;
+def fpimm8_24 : fpimm_t<8, 24>;
+def fpimm16 : fpimm_t<16>;
+def fpimm16_8 : fpimm_t<16, 8>;
+def fpimm16_16 : fpimm_t<16, 16>;
+def fpimm24 : fpimm_t<24>;
+def fpimm24_8 : fpimm_t<24, 8>;
+def fpimm32 : fpimm_t<32>;
+
+def fpimm8_sr0_XFORM : fpimm_xform<8>;
+def fpimm8_sr8_XFORM : fpimm_xform<8, 8>;
+def fpimm8_sr16_XFORM : fpimm_xform<8, 16>;
+def fpimm8_sr24_XFORM : fpimm_xform<8, 24>;
+
+def fpimm8_sr0_i16_XFORM : fpimm_xform_i16<8>;
+def fpimm8_sr8_i16_XFORM : fpimm_xform_i16<8, 8>;
+
+def fconstpool_symbol : Operand<iPTR> {
+ let ParserMatchClass = Constpool;
+ let EncoderMethod =
+ "getConstpoolSymbolOpValue<CSKY::fixup_csky_pcrel_uimm8_scale4>";
+ let DecoderMethod = "decodeUImmOperand<8, 2>";
+ let PrintMethod = "printConstpool";
+ let OperandType = "OPERAND_PCREL";
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//arithmetic
+
+def FABSM : F_XZ<0x2, 0b000110, "fabsm", "", UnOpFrag<(fabs node:$Src)>, sFPR64_V_OP>;
+def FNEGM : F_XZ<0x2, 0b000111, "fnegm", "", UnOpFrag<(fneg node:$Src)>, sFPR64_V_OP>;
+def FADDM : F_XYZ<0x2, 0b000000, "faddm", "", BinOpFrag<(fadd node:$LHS, node:$RHS)>, sFPR64_V_OP>;
+def FSUBM : F_XYZ<0x2, 0b000001, "fsubm", "", BinOpFrag<(fsub node:$LHS, node:$RHS)>, sFPR64_V_OP>;
+def FMULM : F_XYZ<0x2, 0b010000, "fmulm", "", BinOpFrag<(fmul node:$LHS, node:$RHS)>, sFPR64_V_OP>;
+def FNMULM : F_XYZ<0x2, 0b010001, "fnmulm", "", BinOpFrag<(fneg (fmul node:$LHS, node:$RHS))>, sFPR64_V_OP>;
+def FMACM : F_ACCUM_XYZ<0x2, 0b010100, "fmacm", "", TriOpFrag<(fadd node:$LHS, (fmul node:$MHS, node:$RHS))>, sFPR64_V_OP>;
+def FMSCM : F_ACCUM_XYZ<0x2, 0b010101, "fmscm", "", TriOpFrag<(fsub (fmul node:$MHS, node:$RHS), node:$LHS)>, sFPR64_V_OP>;
+def FNMACM : F_ACCUM_XYZ<0x2, 0b010110, "fnmacm", "", TriOpFrag<(fsub node:$LHS, (fmul node:$MHS, node:$RHS))>, sFPR64_V_OP>;
+def FNMSCM : F_ACCUM_XYZ<0x2, 0b010111, "fnmscm", "", TriOpFrag<(fneg (fadd node:$LHS, (fmul node:$MHS, node:$RHS)))>, sFPR64_V_OP>;
+
+def FMOVM : F_MOV<0x2, 0b000100, "fmovm", "", sFPR64_V_OP>;
+
+defm FABS : FT_XZ<0b000110, "fabs", UnOpFrag<(fabs node:$Src)>>;
+defm FNEG : FT_XZ<0b000111, "fneg", UnOpFrag<(fneg node:$Src)>>;
+defm FSQRT : FT_XZ<0b011010, "fsqrt", UnOpFrag<(fsqrt node:$Src)>>;
+
+defm FADD : FT_XYZ<0b000000, "fadd", BinOpFrag<(fadd node:$LHS, node:$RHS)>>;
+defm FSUB : FT_XYZ<0b000001, "fsub", BinOpFrag<(fsub node:$LHS, node:$RHS)>>;
+defm FDIV : FT_XYZ<0b011000, "fdiv", BinOpFrag<(fdiv node:$LHS, node:$RHS)>>;
+defm FMUL : FT_XYZ<0b010000, "fmul", BinOpFrag<(fmul node:$LHS, node:$RHS)>>;
+defm FNMUL : FT_XYZ<0b010001, "fnmul", BinOpFrag<(fneg (fmul node:$LHS, node:$RHS))>>;
+defm FMAC : FT_ACCUM_XYZ<0b010100, "fmac", TriOpFrag<(fadd node:$LHS, (fmul node:$MHS, node:$RHS))>>;
+defm FMSC : FT_ACCUM_XYZ<0b010101, "fmsc", TriOpFrag<(fsub (fmul node:$MHS, node:$RHS), node:$LHS)>>;
+defm FNMAC : FT_ACCUM_XYZ<0b010110, "fnmac", TriOpFrag<(fsub node:$LHS, (fmul node:$MHS, node:$RHS))>>;
+defm FNMSC : FT_ACCUM_XYZ<0b010111, "fnmsc", TriOpFrag<(fneg (fadd node:$LHS, (fmul node:$MHS, node:$RHS)))>>;
+
+defm FCMPHS : FT_CMPXY<0b001100, "fcmphs">;
+defm FCMPLT : FT_CMPXY<0b001101, "fcmplt">;
+defm FCMPNE : FT_CMPXY<0b001110, "fcmpne">;
+defm FCMPUO : FT_CMPXY<0b001111, "fcmpuo">;
+defm FCMPZHS : FT_CMPZX<0b001000, "fcmpzhs">;
+defm FCMPZLS : FT_CMPZX<0b001001, "fcmpzls">;
+defm FCMPZNE : FT_CMPZX<0b001010, "fcmpzne">;
+defm FCMPZUO : FT_CMPZX<0b001011, "fcmpzuo">;
+
+defm FRECIP : FT_MOV<0b011001, "frecip">;
+
+//fmov, fmtvr, fmfvr
+defm FMOV : FT_MOV<0b000100, "fmov">;
+def FMFVRL : F_XZ_GF<3, 0b011001, (outs GPR:$rz), (ins sFPR32Op:$vrx),
+ "fmfvrl\t$rz, $vrx", [(set GPR:$rz, (bitconvert sFPR32Op:$vrx))]>;
+def FMTVRL : F_XZ_FG<3, 0b011011, (outs sFPR32Op:$vrz), (ins GPR:$rx),
+ "fmtvrl\t$vrz, $rx", [(set sFPR32Op:$vrz, (bitconvert GPR:$rx))]>;
+
+let Predicates = [HasFPUv2_DF] in {
+ let isCodeGenOnly = 1 in
+ def FMFVRL_D : F_XZ_GF<3, 0b011001, (outs GPR:$rz), (ins sFPR64Op:$vrx),
+ "fmfvrl\t$rz, $vrx", []>;
+ def FMFVRH_D : F_XZ_GF<3, 0b011000, (outs GPR:$rz), (ins sFPR64Op:$vrx),
+ "fmfvrh\t$rz, $vrx", []>;
+ let isCodeGenOnly = 1 in
+ def FMTVRL_D : F_XZ_FG<3, 0b011011, (outs sFPR64Op:$vrz), (ins GPR:$rx),
+ "fmtvrl\t$vrz, $rx", []>;
+let Constraints = "$vrZ = $vrz" in
+ def FMTVRH_D : F_XZ_FG<3, 0b011010, (outs sFPR64Op:$vrz), (ins sFPR64Op:$vrZ, GPR:$rx),
+ "fmtvrh\t$vrz, $rx", []>;
+}
+
+//fcvt
+
+def FSITOS : F_XZ_TRANS<0b010000, "fsitos", sFPR32Op, sFPR32Op>;
+def : Pat<(f32 (sint_to_fp GPR:$a)),
+ (FSITOS (COPY_TO_REGCLASS GPR:$a, sFPR32))>,
+ Requires<[HasFPUv2_SF]>;
+
+def FUITOS : F_XZ_TRANS<0b010001, "fuitos", sFPR32Op, sFPR32Op>;
+def : Pat<(f32 (uint_to_fp GPR:$a)),
+ (FUITOS (COPY_TO_REGCLASS GPR:$a, sFPR32))>,
+ Requires<[HasFPUv2_SF]>;
+
+def FSITOD : F_XZ_TRANS<0b010100, "fsitod", sFPR64Op, sFPR64Op>;
+def : Pat<(f64 (sint_to_fp GPR:$a)),
+ (FSITOD (COPY_TO_REGCLASS GPR:$a, sFPR64))>,
+ Requires<[HasFPUv2_DF]>;
+
+def FUITOD : F_XZ_TRANS<0b010101, "fuitod", sFPR64Op, sFPR64Op>;
+def : Pat<(f64 (uint_to_fp GPR:$a)),
+ (FUITOD (COPY_TO_REGCLASS GPR:$a, sFPR64))>,
+ Requires<[HasFPUv2_DF]>;
+
+let Predicates = [HasFPUv2_DF] in {
+def FDTOS : F_XZ_TRANS_DS<0b010110,"fdtos", UnOpFrag<(fpround node:$Src)>>;
+def FSTOD : F_XZ_TRANS_SD<0b010111,"fstod", UnOpFrag<(fpextend node:$Src)>>;
+}
+
+def rpiFSTOSI : F_XZ_TRANS<0b000010, "fstosi.rpi", sFPR32Op, sFPR32Op>;
+def rpiFSTOUI : F_XZ_TRANS<0b000110, "fstoui.rpi", sFPR32Op, sFPR32Op>;
+def rzFSTOSI : F_XZ_TRANS<0b000001, "fstosi.rz", sFPR32Op, sFPR32Op>;
+def rzFSTOUI : F_XZ_TRANS<0b000101, "fstoui.rz", sFPR32Op, sFPR32Op>;
+def rnFSTOSI : F_XZ_TRANS<0b000000, "fstosi.rn", sFPR32Op, sFPR32Op>;
+def rnFSTOUI : F_XZ_TRANS<0b000100, "fstoui.rn", sFPR32Op, sFPR32Op>;
+def rniFSTOSI : F_XZ_TRANS<0b000011, "fstosi.rni", sFPR32Op, sFPR32Op>;
+def rniFSTOUI : F_XZ_TRANS<0b000111, "fstoui.rni", sFPR32Op, sFPR32Op>;
+
+let Predicates = [HasFPUv2_DF] in {
+def rpiFDTOSI : F_XZ_TRANS<0b001010, "fdtosi.rpi", sFPR64Op, sFPR64Op>;
+def rpiFDTOUI : F_XZ_TRANS<0b001110, "fdtoui.rpi", sFPR64Op, sFPR64Op>;
+def rzFDTOSI : F_XZ_TRANS<0b001001, "fdtosi.rz", sFPR64Op, sFPR64Op>;
+def rzFDTOUI : F_XZ_TRANS<0b001101, "fdtoui.rz", sFPR64Op, sFPR64Op>;
+def rnFDTOSI : F_XZ_TRANS<0b001000, "fdtosi.rn", sFPR64Op, sFPR64Op>;
+def rnFDTOUI : F_XZ_TRANS<0b001100, "fdtoui.rn", sFPR64Op, sFPR64Op>;
+def rniFDTOSI : F_XZ_TRANS<0b001011, "fdtosi.rni", sFPR64Op, sFPR64Op>;
+def rniFDTOUI : F_XZ_TRANS<0b001111, "fdtoui.rni", sFPR64Op, sFPR64Op>;
+}
+
+multiclass FPToIntegerPats<SDNode round, string SUFFIX> {
+ def : Pat<(i32 (fp_to_sint (round sFPR32Op:$Rn))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FSTOSI) sFPR32Op:$Rn), GPR)>,
+ Requires<[HasFPUv2_SF]>;
+ def : Pat<(i32 (fp_to_uint (round sFPR32Op:$Rn))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FSTOUI) sFPR32Op:$Rn), GPR)>,
+ Requires<[HasFPUv2_SF]>;
+ def : Pat<(i32 (fp_to_sint (round sFPR64Op:$Rn))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FDTOSI) sFPR64Op:$Rn), GPR)>,
+ Requires<[HasFPUv2_DF]>;
+ def : Pat<(i32 (fp_to_uint (round sFPR64Op:$Rn))),
+ (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FDTOUI) sFPR64Op:$Rn), GPR)>,
+ Requires<[HasFPUv2_DF]>;
+}
+
+defm: FPToIntegerPats<fceil, "rpi">;
+defm: FPToIntegerPats<fround, "rn">;
+defm: FPToIntegerPats<ffloor, "rni">;
+
+multiclass FPToIntegerTowardszeroPats<string SUFFIX> {
+ def : Pat<(i32 (fp_to_sint sFPR32Op:$Rn)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FSTOSI) sFPR32Op:$Rn), GPR)>,
+ Requires<[HasFPUv2_SF]>;
+ def : Pat<(i32 (fp_to_uint sFPR32Op:$Rn)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FSTOUI) sFPR32Op:$Rn), GPR)>,
+ Requires<[HasFPUv2_SF]>;
+ def : Pat<(i32 (fp_to_sint sFPR64Op:$Rn)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FDTOSI) sFPR64Op:$Rn), GPR)>,
+ Requires<[HasFPUv2_DF]>;
+ def : Pat<(i32 (fp_to_uint sFPR64Op:$Rn)),
+ (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FDTOUI) sFPR64Op:$Rn), GPR)>,
+ Requires<[HasFPUv2_DF]>;
+}
+
+defm: FPToIntegerTowardszeroPats<"rz">;
+
+
+//fld, fst
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+ defm FLD : FT_XYAI_LD<0b0010000, "fld">;
+ defm FLDR : FT_XYAR_LD<0b0010100, "fldr">;
+ defm FLDM : FT_XYAR_LDM<0b0011000, "fldm">;
+
+ let Predicates = [HasFPUv2_DF] in
+ def FLDRM : F_XYAR_LD<0b0010101, 0, "fldrm", "", sFPR64Op>;
+ let Predicates = [HasFPUv2_DF] in
+ def FLDMM : F_I4_XY_MEM<0b0011001, 0,
+ (outs), (ins GPR:$rx, regseq_d1:$regs, variable_ops), "fldmm\t$regs, (${rx})", []>;
+ let Predicates = [HasFPUv2_DF] in
+ def FLDM : F_XYAI_LD<0b0010001, 0, "fldm", "", sFPR64Op, uimm8_3>;
+}
+
+
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+ defm FST : FT_XYAI_ST<0b0010010, "fst">;
+ defm FSTR : FT_XYAR_ST<0b0010110, "fstr">;
+ defm FSTM : FT_XYAR_STM<0b0011010, "fstm">;
+
+ let Predicates = [HasFPUv2_DF] in
+ def FSTRM : F_XYAR_ST<0b0010111, 0, "fstrm", "", sFPR64Op>;
+ let Predicates = [HasFPUv2_DF] in
+ def FSTMM : F_I4_XY_MEM<0b0011011, 0,
+ (outs), (ins GPR:$rx, regseq_d1:$regs, variable_ops), "fstmm\t$regs, (${rx})", []>;
+ let Predicates = [HasFPUv2_DF] in
+ def FSTM : F_XYAI_ST<0b0010011, 0, "fstm", "", sFPR64Op, uimm8_3>;
+}
+
+defm : LdPat<load, uimm8_2, FLD_S, f32>, Requires<[HasFPUv2_SF]>;
+defm : LdPat<load, uimm8_2, FLD_D, f64>, Requires<[HasFPUv2_DF]>;
+defm : LdrPat<load, FLDR_S, f32>, Requires<[HasFPUv2_SF]>;
+defm : LdrPat<load, FLDR_D, f64>, Requires<[HasFPUv2_DF]>;
+
+defm : StPat<store, f32, uimm8_2, FST_S>, Requires<[HasFPUv2_SF]>;
+defm : StPat<store, f64, uimm8_2, FST_D>, Requires<[HasFPUv2_DF]>;
+defm : StrPat<store, f32, FSTR_S>, Requires<[HasFPUv2_SF]>;
+defm : StrPat<store, f64, FSTR_D>, Requires<[HasFPUv2_DF]>;
+
+
+def : Pat<(f32 fpimm16:$imm), (COPY_TO_REGCLASS (MOVI32 (fpimm32_lo16 fpimm16:$imm)), sFPR32)>,
+ Requires<[HasFPUv2_SF]>;
+def : Pat<(f32 fpimm16_16:$imm), (f32 (COPY_TO_REGCLASS (MOVIH32 (fpimm32_hi16 fpimm16_16:$imm)), sFPR32))>,
+ Requires<[HasFPUv2_SF]>;
+def : Pat<(f32 fpimm:$imm), (COPY_TO_REGCLASS (ORI32 (MOVIH32 (fpimm32_hi16 fpimm:$imm)), (fpimm32_lo16 fpimm:$imm)), sFPR32)>,
+ Requires<[HasFPUv2_SF]>;
+
+def : Pat<(f64(CSKY_BITCAST_FROM_LOHI GPR:$rs1, GPR:$rs2)), (FMTVRH_D(FMTVRL_D GPR:$rs1), GPR:$rs2)>,
+ Requires<[HasFPUv2_DF]>;
+
+multiclass BRCond_Bin<CondCode CC, string Instr, Instruction Br, Instruction MV> {
+ let Predicates = [HasFPUv2_SF] in
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)), bb:$imm16),
+ (Br (!cast<Instruction>(Instr#_S) sFPR32Op:$rs1, sFPR32Op:$rs2), bb:$imm16)>;
+ let Predicates = [HasFPUv2_DF] in
+ def : Pat<(brcond (i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)), bb:$imm16),
+ (Br (!cast<Instruction>(Instr#_D) sFPR64Op:$rs1, sFPR64Op:$rs2), bb:$imm16)>;
+
+ let Predicates = [HasFPUv2_SF] in
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)),
+ (MV (!cast<Instruction>(Instr#_S) sFPR32Op:$rs1, sFPR32Op:$rs2))>;
+ let Predicates = [HasFPUv2_DF] in
+ def : Pat<(i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)),
+ (MV (!cast<Instruction>(Instr#_D) sFPR64Op:$rs1, sFPR64Op:$rs2))>;
+}
+
+multiclass BRCond_Bin_SWAP<CondCode CC, string Instr, Instruction Br, Instruction MV> {
+ let Predicates = [HasFPUv2_SF] in
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)), bb:$imm16),
+ (Br (!cast<Instruction>(Instr#_S) sFPR32Op:$rs2, sFPR32Op:$rs1), bb:$imm16)>;
+ let Predicates = [HasFPUv2_DF] in
+ def : Pat<(brcond (i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)), bb:$imm16),
+ (Br (!cast<Instruction>(Instr#_D) sFPR64Op:$rs2, sFPR64Op:$rs1), bb:$imm16)>;
+
+ let Predicates = [HasFPUv2_SF] in
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)),
+ (MV (!cast<Instruction>(Instr#_S) sFPR32Op:$rs2, sFPR32Op:$rs1))>;
+ let Predicates = [HasFPUv2_DF] in
+ def : Pat<(i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)),
+ (MV (!cast<Instruction>(Instr#_D) sFPR64Op:$rs2, sFPR64Op:$rs1))>;
+}
+
+// inverse (order && compare) to (unorder || inverse(compare))
+
+defm : BRCond_Bin<SETUNE, "FCMPNE", BT32, MVC32>;
+defm : BRCond_Bin<SETOEQ, "FCMPNE", BF32, MVCV32>;
+defm : BRCond_Bin<SETOGE, "FCMPHS", BT32, MVC32>;
+defm : BRCond_Bin<SETOLT, "FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin<SETUO, "FCMPUO", BT32, MVC32>;
+defm : BRCond_Bin<SETO, "FCMPUO", BF32, MVCV32>;
+defm : BRCond_Bin_SWAP<SETOGT, "FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP<SETOLE, "FCMPHS", BT32, MVC32>;
+
+defm : BRCond_Bin<SETNE, "FCMPNE", BT32, MVC32>;
+defm : BRCond_Bin<SETEQ, "FCMPNE", BF32, MVCV32>;
+defm : BRCond_Bin<SETGE, "FCMPHS", BT32, MVC32>;
+defm : BRCond_Bin<SETLT, "FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP<SETGT, "FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP<SETLE, "FCMPHS", BT32, MVC32>;
+
+// -----------
+
+let Predicates = [HasFPUv2_SF] in {
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETOGE)), bb:$imm16),
+ (BT32 (FCMPZHS_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETOGE)),
+ (MVC32 (FCMPZHS_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETOLT)), bb:$imm16),
+ (BF32 (FCMPZHS_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETOLT)),
+ (MVCV32 (FCMPZHS_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETOLE)), bb:$imm16),
+ (BT32 (FCMPZLS_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETOLE)),
+ (MVC32 (FCMPZLS_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETOGT)), bb:$imm16),
+ (BF32 (FCMPZLS_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETOGT)),
+ (MVCV32 (FCMPZLS_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETUNE)), bb:$imm16),
+ (BT32 (FCMPZNE_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETUNE)),
+ (MVC32 (FCMPZNE_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETOEQ)), bb:$imm16),
+ (BF32 (FCMPZNE_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETOEQ)),
+ (MVCV32 (FCMPZNE_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm, SETUO)), bb:$imm16),
+ (BT32 (FCMPZUO_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm, SETUO)),
+ (MVC32 (FCMPZUO_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm, SETO)), bb:$imm16),
+ (BF32 (FCMPZUO_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm, SETO)),
+ (MVCV32 (FCMPZUO_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETGE)), bb:$imm16),
+ (BT32 (FCMPZHS_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETGE)),
+ (MVC32 (FCMPZHS_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETLT)), bb:$imm16),
+ (BF32 (FCMPZHS_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETLT)),
+ (MVCV32 (FCMPZHS_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETLE)), bb:$imm16),
+ (BT32 (FCMPZLS_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETLE)),
+ (MVC32 (FCMPZLS_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETGT)), bb:$imm16),
+ (BF32 (FCMPZLS_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETGT)),
+ (MVCV32 (FCMPZLS_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETNE)), bb:$imm16),
+ (BT32 (FCMPZNE_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETNE)),
+ (MVC32 (FCMPZNE_S sFPR32Op:$rs1))>;
+ def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETEQ)), bb:$imm16),
+ (BF32 (FCMPZNE_S sFPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETEQ)),
+ (MVCV32 (FCMPZNE_S sFPR32Op:$rs1))>;
+}
+
+let usesCustomInserter = 1 in {
+ let Predicates = [HasFPUv2_SF] in
+ def FSELS : CSKYPseudo<(outs sFPR32Op:$dst), (ins CARRY:$cond, sFPR32Op:$src1, sFPR32Op:$src2),
+ "!fsels\t$dst, $src1, src2", [(set sFPR32Op:$dst, (select CARRY:$cond, sFPR32Op:$src1, sFPR32Op:$src2))]>;
+
+ let Predicates = [HasFPUv2_DF] in
+ def FSELD : CSKYPseudo<(outs sFPR64Op:$dst), (ins CARRY:$cond, sFPR64Op:$src1, sFPR64Op:$src2),
+ "!fseld\t$dst, $src1, src2", [(set sFPR64Op:$dst, (select CARRY:$cond, sFPR64Op:$src1, sFPR64Op:$src2))]>;
+} \ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfoF2.td b/llvm/lib/Target/CSKY/CSKYInstrInfoF2.td
new file mode 100644
index 000000000000..8a00e7d9af3a
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfoF2.td
@@ -0,0 +1,462 @@
+//===- CSKYInstrInfoF2.td - CSKY Instruction Float2.0 ------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the CSKY instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def regseq_f2 : Operand<i32> {
+ let EncoderMethod = "getRegisterSeqOpValue";
+ let ParserMatchClass = RegSeqAsmOperand<"V2">;
+ let PrintMethod = "printRegisterSeq";
+ let DecoderMethod = "DecodeRegSeqOperandF2";
+ let MIOperandInfo = (ops FPR32, uimm5);
+}
+
+def regseq_d2 : Operand<i32> {
+ let EncoderMethod = "getRegisterSeqOpValue";
+ let ParserMatchClass = RegSeqAsmOperand<"V2">;
+ let PrintMethod = "printRegisterSeq";
+ let DecoderMethod = "DecodeRegSeqOperandD2";
+ let MIOperandInfo = (ops FPR64, uimm5);
+}
+
+def FPR32Op : RegisterOperand<FPR32, "printFPR">;
+def FPR64Op : RegisterOperand<FPR64, "printFPR">;
+
+include "CSKYInstrFormatsF2.td"
+
+// Predicates
+def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
+ return isOrEquivalentToAdd(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+defm f2FADD : F2_XYZ_T<0b000000, "fadd", BinOpFrag<(fadd node:$LHS, node:$RHS)>>;
+defm f2FSUB : F2_XYZ_T<0b000001, "fsub", BinOpFrag<(fsub node:$LHS, node:$RHS)>>;
+defm f2FDIV : F2_XYZ_T<0b011000, "fdiv", BinOpFrag<(fdiv node:$LHS, node:$RHS)>>;
+defm f2FMUL : F2_XYZ_T<0b010000, "fmul", BinOpFrag<(fmul node:$LHS, node:$RHS)>>;
+
+defm f2FMAXNM : F2_XYZ_T<0b101000, "fmaxnm", BinOpFrag<(fmaxnum node:$LHS, node:$RHS)>>;
+defm f2FMINNM : F2_XYZ_T<0b101001, "fminnm", BinOpFrag<(fminnum node:$LHS, node:$RHS)>>;
+
+defm f2FABS : F2_XZ_T<0b000110, "fabs", fabs>;
+defm f2FNEG : F2_XZ_T<0b000111, "fneg", fneg>;
+defm f2FSQRT : F2_XZ_T<0b011010, "fsqrt", fsqrt>;
+defm f2FMOV : F2_XZ_SET_T<0b000100, "fmov">;
+def f2FMOVX : F2_XZ_SET<0b00001, FPR32Op, 0b000101, "fmovx.32">;
+
+defm f2RECIP : F2_XZ_SET_T<0b011001, "frecip">;
+
+// fld/fst
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+ def f2FLD_S : F2_LDST_S<0b0, "fld", (outs FPR32Op:$vrz), (ins GPR:$rx, uimm8_2:$imm8)>;
+ let Predicates = [HasFPUv3_DF] in
+ def f2FLD_D : F2_LDST_D<0b0, "fld", (outs FPR64Op:$vrz), (ins GPR:$rx, uimm8_2:$imm8)>;
+}
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+ def f2FST_S : F2_LDST_S<0b1, "fst", (outs), (ins FPR32Op:$vrz, GPR:$rx, uimm8_2:$imm8)>;
+ let Predicates = [HasFPUv3_DF] in
+ def f2FST_D : F2_LDST_D<0b1, "fst", (outs), (ins FPR64Op:$vrz, GPR:$rx, uimm8_2:$imm8)>;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+ def f2FSTM_S : F2_LDSTM_S<0b1, 0, "fstm", (outs), (ins GPR:$rx, regseq_f2:$regs, variable_ops)>;
+ let Predicates = [HasFPUv3_DF] in
+ def f2FSTM_D : F2_LDSTM_D<0b1, 0, "fstm", (outs), (ins GPR:$rx, regseq_d2:$regs, variable_ops)>;
+
+ def f2FSTMU_S : F2_LDSTM_S<0b1, 0b100, "fstmu", (outs), (ins GPR:$rx, regseq_f2:$regs, variable_ops)>;
+ let Predicates = [HasFPUv3_DF] in
+ def f2FSTMU_D : F2_LDSTM_D<0b1, 0b100, "fstmu", (outs), (ins GPR:$rx, regseq_d2:$regs, variable_ops)>;
+}
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+ def f2FLDM_S : F2_LDSTM_S<0b0, 0, "fldm", (outs), (ins GPR:$rx, regseq_f2:$regs, variable_ops)>;
+ let Predicates = [HasFPUv3_DF] in
+ def f2FLDM_D : F2_LDSTM_D<0b0, 0, "fldm", (outs), (ins GPR:$rx, regseq_d2:$regs, variable_ops)>;
+
+ def f2FLDMU_S : F2_LDSTM_S<0b0, 0b100, "fldmu", (outs), (ins GPR:$rx, regseq_f2:$regs, variable_ops)>;
+ let Predicates = [HasFPUv3_DF] in
+ def f2FLDMU_D : F2_LDSTM_D<0b0, 0b100, "fldmu", (outs), (ins GPR:$rx, regseq_d2:$regs, variable_ops)>;
+}
+
+multiclass FLSR {
+ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+ def FLDR_S : F2_LDSTR_S<0b0, "fldr", (outs FPR32Op:$rz), (ins GPR:$rx, GPR:$ry, uimm2:$imm)>;
+ let Predicates = [HasFPUv3_DF] in
+ def FLDR_D : F2_LDSTR_D<0b0, "fldr", (outs FPR64Op:$rz), (ins GPR:$rx, GPR:$ry, uimm2:$imm)>;
+ }
+ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+ def FSTR_S : F2_LDSTR_S<0b1, "fstr", (outs), (ins FPR32Op:$rz, GPR:$rx, GPR:$ry, uimm2:$imm)>;
+ let Predicates = [HasFPUv3_DF] in
+ def FSTR_D : F2_LDSTR_D<0b1, "fstr", (outs), (ins FPR64Op:$rz, GPR:$rx, GPR:$ry, uimm2:$imm)>;
+ }
+}
+
+defm f2: FLSR;
+
+def f2FLRW_S : F2_LRW<0b00, 0b0, "flrw.32", (outs FPR32Op:$vrz), (ins fconstpool_symbol:$imm8)>;
+def f2FLRW_D : F2_LRW<0b01, 0b0, "flrw.64", (outs FPR64Op:$vrz), (ins fconstpool_symbol:$imm8)>;
+
+def : Pat<(f32 (load constpool:$src)), (f2FLRW_S (to_tconstpool tconstpool:$src))>, Requires<[HasFPUv3_SF]>;
+def : Pat<(f64 (load constpool:$src)), (f2FLRW_D (to_tconstpool tconstpool:$src))>, Requires<[HasFPUv3_DF]>;
+
+defm : LdPat<load, uimm8_2, f2FLD_S, f32>, Requires<[HasFPUv3_SF]>;
+defm : LdPat<load, uimm8_2, f2FLD_D, f64>, Requires<[HasFPUv3_DF]>;
+defm : LdrPat<load, f2FLDR_S, f32>, Requires<[HasFPUv3_SF]>;
+defm : LdrPat<load, f2FLDR_D, f64>, Requires<[HasFPUv3_DF]>;
+
+defm : StPat<store, f32, uimm8_2, f2FST_S>, Requires<[HasFPUv3_SF]>;
+defm : StPat<store, f64, uimm8_2, f2FST_D>, Requires<[HasFPUv3_DF]>;
+defm : StrPat<store, f32, f2FSTR_S>, Requires<[HasFPUv3_SF]>;
+defm : StrPat<store, f64, f2FSTR_D>, Requires<[HasFPUv3_DF]>;
+
+// fmfvr
+let vry = 0 in
+def f2FMFVRL : F2_XYZ<0b00011, 0b011001, "fmfvr.32.1\t$vrz, $vrx",
+ (outs GPR:$vrz), (ins FPR32Op:$vrx),
+ [(set GPR:$vrz, (bitconvert FPR32Op:$vrx))]>;
+// TODO: vrz and vrz+1
+def f2FMFVRL_2 : F2_XYZ<0b00011, 0b111010, "fmfvr.32.2\t$vrz, $vry, $vrx",
+ (outs GPR:$vrz, GPR:$vry), (ins FPR64Op:$vrx),
+ []>;
+
+let Predicates = [HasFPUv3_DF] in {
+let vry = 0 in {
+let isCodeGenOnly = 1 in
+def f2FMFVRL_D : F2_XYZ<0b00011, 0b011001, "fmfvr.32.1\t$vrz, $vrx",
+ (outs GPR:$vrz), (ins FPR64Op:$vrx),
+ []>;
+def f2FMFVRH_D : F2_XYZ<0b00011, 0b011000, "fmfvrh\t$vrz, $vrx",
+ (outs GPR:$vrz), (ins FPR64Op:$vrx),
+ []>;
+}
+def f2FMFVR_D : F2_XYZ<0b00011, 0b111000, "fmfvr.64\t$vrz, $vry, $vrx",
+ (outs GPR:$vrz, GPR:$vry), (ins FPR64Op:$vrx),
+ [(set GPR:$vrz, GPR:$vry, (CSKY_BITCAST_TO_LOHI FPR64Op:$vrx))]>;
+}
+
+// fmtvr
+def f2FMTVRL : F2_XZ_P<0b00011, 0b011011, "fmtvr.32.1",
+ [(set FPR32Op:$vrz, (bitconvert GPR:$vrx))],
+ (outs FPR32Op:$vrz), (ins GPR:$vrx)>;
+// TODO: vrz and vrz+1
+def f2FMTVRL_2 : F2_XYZ<0b00011, 0b111110, "fmtvr.32.2\t$vrz, $vrx, $vry",
+ (outs FPR32Op:$vrz), (ins GPR:$vrx, GPR:$vry),
+ []>;
+
+let Predicates = [HasFPUv3_DF] in {
+let isCodeGenOnly = 1 in
+def f2FMTVRL_D : F2_XZ_P<0b00011, 0b011011, "fmtvr.32.1",
+ [],
+ (outs FPR64Op:$vrz), (ins GPR:$vrx)>;
+let Constraints = "$vrZ = $vrz" in
+def f2FMTVRH_D : F2_XZ_P<0b00011, 0b011010, "fmtvrh",
+ [],
+ (outs FPR64Op:$vrz), (ins FPR64Op:$vrZ, GPR:$vrx)>;
+def f2FMTVR_D : F2_XYZ<0b00011, 0b111100, "fmtvr.64\t$vrz, $vrx, $vry",
+ (outs FPR64Op:$vrz), (ins GPR:$vrx, GPR:$vry),
+ [(set FPR64Op:$vrz, (CSKY_BITCAST_FROM_LOHI GPR:$vrx, GPR:$vry))]>;
+}
+
+// fcmp
+
+defm f2FCMPHS: F2_CXY_T<0b001100, "fcmphs">;
+defm f2FCMPLT: F2_CXY_T<0b001101, "fcmplt">;
+defm f2FCMPNE: F2_CXY_T<0b001110, "fcmpne">;
+defm f2FCMPUO: F2_CXY_T<0b001111, "fcmpuo">;
+
+defm f2FCMPHSZ: F2_CX_T<0b001000, "fcmphsz">;
+defm f2FCMPHZ : F2_CX_T<0b101010, "fcmphz">;
+defm f2FCMPLSZ: F2_CX_T<0b101011, "fcmplsz">;
+defm f2FCMPLTZ: F2_CX_T<0b001001, "fcmpltz">;
+defm f2FCMPNEZ: F2_CX_T<0b001010, "fcmpnez">;
+defm f2FCMPUOZ: F2_CX_T<0b001011, "fcmpuoz">;
+
+defm f2FMULA : F2_XYZZ_T<0b010100, "fmula",
+ TriOpFrag<(fadd (fmul node:$LHS, node:$MHS), node:$RHS)>>;
+
+defm f2FMULS : F2_XYZZ_T<0b010110, "fmuls",
+ TriOpFrag<(fsub node:$RHS, (fmul node:$LHS, node:$MHS))>>;
+
+defm f2FFMULA : F2_XYZZ_T<0b110000, "ffmula",
+ TriOpFrag<(fma node:$LHS, node:$MHS, node:$RHS)>>;
+
+defm f2FFMULS : F2_XYZZ_T<0b110001, "ffmuls",
+ TriOpFrag<(fma (fneg node:$LHS), node:$MHS, node:$RHS)>>;
+
+defm f2FFNMULA : F2_XYZZ_T<0b110010, "ffnmula",
+ TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))>>;
+
+defm f2FFNMULS : F2_XYZZ_T<0b110011, "ffnmuls",
+ TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))>>;
+
+defm f2FNMULA : F2_XYZZ_T<0b010111, "fnmula",
+ TriOpFrag<(fneg (fadd (fmul node:$LHS, node:$MHS), node:$RHS))>>;
+
+defm f2FNMULS : F2_XYZZ_T<0b010101, "fnmuls",
+ TriOpFrag<(fneg (fsub node:$RHS, (fmul node:$LHS, node:$MHS)))>>;
+
+defm f2FNMUL : F2_XYZ_T<0b010001, "fnmul",
+ BinOpFrag<(fneg (fmul node:$LHS, node:$RHS))>>;
+
+// fcvt
+def f2FFTOS32_S : F2_XZ_P<0b01000, 0b011011, "fftoi.f32.s32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FFTOU32_S : F2_XZ_P<0b01000, 0b011010, "fftoi.f32.u32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FS32TOF_S : F2_XZ_P<0b01001, 0b011011, "fitof.s32.f32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FU32TOF_S : F2_XZ_P<0b01001, 0b011010, "fitof.u32.f32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FFTOXU32_S : F2_XZ_P<0b01000, 0b001010, "fftox.f32.u32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FFTOXS32_S : F2_XZ_P<0b01000, 0b001011, "fftox.f32.s32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FXTOFU32_S : F2_XZ_P<0b01001, 0b001010, "fxtof.u32.f32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FXTOFS32_S : F2_XZ_P<0b01001, 0b001011, "fxtof.s32.f32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+let Predicates = [HasFPUv3_DF] in {
+def f2FFTOS32_D : F2_XZ_P<0b01000, 0b011101, "fftoi.f64.s32", [], (outs FPR32Op:$vrz), (ins FPR64Op:$vrx)>;
+def f2FFTOU32_D : F2_XZ_P<0b01000, 0b011100, "fftoi.f64.u32", [], (outs FPR32Op:$vrz), (ins FPR64Op:$vrx)>;
+def f2FS32TOF_D : F2_XZ_P<0b01001, 0b011101, "fitof.s32.f64", [], (outs FPR64Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FU32TOF_D : F2_XZ_P<0b01001, 0b011100, "fitof.u32.f64", [], (outs FPR64Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FFTOXU32_D : F2_XZ_P<0b01000, 0b001100, "fftox.f64.u32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FFTOXS32_D : F2_XZ_P<0b01000, 0b001101, "fftox.f64.s32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FXTOFU32_D : F2_XZ_P<0b01001, 0b001100, "fxtof.u32.f64", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FXTOFS32_D : F2_XZ_P<0b01001, 0b001101, "fxtof.s32.f64", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+}
+
+defm f2FF32TOSI32 : F2_XZ_RM<0b00011, 0b0000, "fftoi.f32.s32", (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+defm f2FF32TOUI32 : F2_XZ_RM<0b00011, 0b0001, "fftoi.f32.u32", (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+defm f2FF32TOFI32 : F2_XZ_RM<0b01000, 0b1001, "fftofi.f32", (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+let Predicates = [HasFPUv3_DF] in {
+defm f2FF64TOSI32 : F2_XZ_RM<0b00011, 0b0010, "fftoi.f64.s32", (outs FPR32Op:$vrz), (ins FPR64Op:$vrx)>;
+defm f2FF64TOUI32 : F2_XZ_RM<0b00011, 0b0011, "fftoi.f64.u32", (outs FPR32Op:$vrz), (ins FPR64Op:$vrx)>;
+defm f2FF64TOFI32 : F2_XZ_RM<0b01000, 0b1010, "fftofi.f64", (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+}
+
+def : Pat<(i32 (fp_to_sint (fround FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOSI32_RN $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_uint (fround FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOUI32_RN $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_sint (fceil FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOSI32_RPI $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_uint (fceil FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOUI32_RPI $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_sint (ffloor FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOSI32_RNI $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_uint (ffloor FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOUI32_RNI $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_sint (ftrunc FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOSI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_uint (ftrunc FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOUI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_sint FPR32Op:$vrx)), (COPY_TO_REGCLASS (f2FF32TOSI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_uint FPR32Op:$vrx)), (COPY_TO_REGCLASS (f2FF32TOUI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+
+def : Pat<(i32 (fp_to_sint (fround FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOSI32_RN $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_uint (fround FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOUI32_RN $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_sint (fceil FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOSI32_RPI $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_uint (fceil FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOUI32_RPI $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_sint (ffloor FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOSI32_RNI $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_uint (ffloor FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOUI32_RNI $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_sint (ftrunc FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOSI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_uint (ftrunc FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOUI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_sint FPR64Op:$vrx)), (COPY_TO_REGCLASS (f2FF64TOSI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_uint FPR64Op:$vrx)), (COPY_TO_REGCLASS (f2FF64TOUI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+
+def : Pat<(sint_to_fp GPR:$vrx), (f2FS32TOF_S (COPY_TO_REGCLASS $vrx, FPR32))>, Requires<[HasFPUv3_SF]>;
+def : Pat<(uint_to_fp GPR:$vrx), (f2FU32TOF_S (COPY_TO_REGCLASS $vrx, FPR32))>, Requires<[HasFPUv3_SF]>;
+def : Pat<(sint_to_fp GPR:$vrx), (f2FS32TOF_D (COPY_TO_REGCLASS $vrx, FPR32))>, Requires<[HasFPUv3_DF]>;
+def : Pat<(uint_to_fp GPR:$vrx), (f2FU32TOF_D (COPY_TO_REGCLASS $vrx, FPR32))>, Requires<[HasFPUv3_DF]>;
+
+let Predicates = [HasFPUv3_DF] in {
+def f2FDTOS : F2_XZ_P<0b00011, 0b010110, "fdtos", [(set FPR32Op:$vrz, (fpround FPR64Op:$vrx))], (outs FPR32Op:$vrz),
+ (ins FPR64Op:$vrx)>;
+def f2FSTOD : F2_XZ_P<0b00011, 0b010111, "fstod", [(set FPR64Op:$vrz, (fpextend FPR32Op:$vrx))], (outs FPR64Op:$vrz),
+ (ins FPR32Op:$vrx)>;
+}
+
+// fsel
+defm f2FSEL: F2_CXYZ_T<0b111001, "fsel">;
+
+def f2FINS: F2_XZ_SET<0b00000, FPR32Op, 0b011011, "fins.32">;
+
+def : Pat<(f32 fpimm16:$imm),(COPY_TO_REGCLASS (MOVI32 (fpimm32_lo16 fpimm16:$imm)), FPR32)>,
+ Requires<[HasFPUv3_SF]>;
+def : Pat<(f32 fpimm16_16:$imm), (COPY_TO_REGCLASS (MOVIH32 (fpimm32_hi16 fpimm16_16:$imm)), FPR32)>,
+ Requires<[HasFPUv3_SF]>;
+def : Pat<(f32 fpimm:$imm),(COPY_TO_REGCLASS (ORI32 (MOVIH32 (fpimm32_hi16 fpimm:$imm)), (fpimm32_lo16 fpimm:$imm)), FPR32)>,
+ Requires<[HasFPUv3_SF]>;
+
+
+multiclass BRCond_Bin_F2<CondCode CC, string Instr, Instruction Br, Instruction MV, bit IsSelectSwap = 0> {
+ let Predicates = [HasFPUv3_SF] in
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), bb:$imm16),
+ (Br (!cast<Instruction>(Instr#_S) FPR32Op:$rs1, FPR32Op:$rs2), bb:$imm16)>;
+ let Predicates = [HasFPUv3_DF] in
+ def : Pat<(brcond (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), bb:$imm16),
+ (Br (!cast<Instruction>(Instr#_D) FPR64Op:$rs1, FPR64Op:$rs2), bb:$imm16)>;
+
+ let Predicates = [HasFPUv3_SF] in
+ def : Pat<(i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)),
+ (MV (!cast<Instruction>(Instr#_S) FPR32Op:$rs1, FPR32Op:$rs2))>;
+ let Predicates = [HasFPUv3_DF] in
+ def : Pat<(i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)),
+ (MV (!cast<Instruction>(Instr#_D) FPR64Op:$rs1, FPR64Op:$rs2))>;
+
+ let Predicates = [HasFPUv3_SF] in {
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), FPR32Op:$rx, FPR32Op:$false),
+ !if(
+ !eq(IsSelectSwap, 0),
+ (f2FSEL_S (!cast<Instruction>(Instr#_S) FPR32Op:$rs1, FPR32Op:$rs2), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (!cast<Instruction>(Instr#_S) FPR32Op:$rs1, FPR32Op:$rs2), FPR32Op:$false, FPR32Op:$rx)
+ )>;
+ }
+ let Predicates = [HasFPUv3_DF] in {
+ def : Pat<(select (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), FPR64Op:$rx, FPR64Op:$false),
+ !if(
+ !eq(IsSelectSwap, 0),
+ (f2FSEL_D (!cast<Instruction>(Instr#_D) FPR64Op:$rs1, FPR64Op:$rs2), FPR64Op:$rx, FPR64Op:$false),
+ (f2FSEL_D (!cast<Instruction>(Instr#_D) FPR64Op:$rs1, FPR64Op:$rs2), FPR64Op:$false, FPR64Op:$rx)
+ )>;
+ }
+}
+
+multiclass BRCond_Bin_SWAP_F2<CondCode CC, string Instr, Instruction Br, Instruction MV, bit IsSelectSwap = 0> {
+ let Predicates = [HasFPUv3_SF] in
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), bb:$imm16),
+ (Br (!cast<Instruction>(Instr#_S) FPR32Op:$rs2, FPR32Op:$rs1), bb:$imm16)>;
+ let Predicates = [HasFPUv3_DF] in
+ def : Pat<(brcond (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), bb:$imm16),
+ (Br (!cast<Instruction>(Instr#_D) FPR64Op:$rs2, FPR64Op:$rs1), bb:$imm16)>;
+
+ let Predicates = [HasFPUv3_SF] in
+ def : Pat<(i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)),
+ (MV (!cast<Instruction>(Instr#_S) FPR32Op:$rs2, FPR32Op:$rs1))>;
+ let Predicates = [HasFPUv3_DF] in
+ def : Pat<(i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)),
+ (MV (!cast<Instruction>(Instr#_D) FPR64Op:$rs2, FPR64Op:$rs1))>;
+
+ let Predicates = [HasFPUv3_SF] in {
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), FPR32Op:$rx, FPR32Op:$false),
+ !if(
+ !eq(IsSelectSwap, 0),
+ (f2FSEL_S (!cast<Instruction>(Instr#_S) FPR32Op:$rs2, FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (!cast<Instruction>(Instr#_S) FPR32Op:$rs2, FPR32Op:$rs1), FPR32Op:$false, FPR32Op:$rx)
+ )>;
+ }
+ let Predicates = [HasFPUv3_DF] in {
+ def : Pat<(select (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), FPR64Op:$rx, FPR64Op:$false),
+ !if(
+ !eq(IsSelectSwap, 0),
+ (f2FSEL_D (!cast<Instruction>(Instr#_D) FPR64Op:$rs2, FPR64Op:$rs1), FPR64Op:$rx, FPR64Op:$false),
+ (f2FSEL_D (!cast<Instruction>(Instr#_D) FPR64Op:$rs2, FPR64Op:$rs1), FPR64Op:$false, FPR64Op:$rx)
+ )>;
+ }
+}
+
+// inverse (order && compare) to (unorder || inverse(compare))
+
+defm : BRCond_Bin_F2<SETUNE, "f2FCMPNE", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETOEQ, "f2FCMPNE", BF32, MVCV32, 1>;
+defm : BRCond_Bin_F2<SETOGE, "f2FCMPHS", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETOLT, "f2FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETUO, "f2FCMPUO", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETO, "f2FCMPUO", BF32, MVCV32, 1>;
+defm : BRCond_Bin_SWAP_F2<SETOGT, "f2FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP_F2<SETOLE, "f2FCMPHS", BT32, MVC32>;
+
+defm : BRCond_Bin_F2<SETNE, "f2FCMPNE", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETEQ, "f2FCMPNE", BF32, MVCV32, 1>;
+defm : BRCond_Bin_F2<SETGE, "f2FCMPHS", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETLT, "f2FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP_F2<SETGT, "f2FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP_F2<SETLE, "f2FCMPHS", BT32, MVC32>;
+
+// ------
+
+let Predicates = [HasFPUv3_SF] in {
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETOGE)), bb:$imm16),
+ (BT32 (f2FCMPHSZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETOGE)),
+ (MVC32 (f2FCMPHSZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETOGE)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPHSZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETOLT)), bb:$imm16),
+ (BT32 (f2FCMPLTZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETOLT)),
+ (MVC32 (f2FCMPLTZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETOLT)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPLTZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETOLE)), bb:$imm16),
+ (BT32 (f2FCMPLSZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETOLE)),
+ (MVC32 (f2FCMPLSZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETOLE)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPLSZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETOGT)), bb:$imm16),
+ (BT32 (f2FCMPHZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETOGT)),
+ (MVC32 (f2FCMPHZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETOGT)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPHZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETUNE)), bb:$imm16),
+ (BT32 (f2FCMPNEZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETUNE)),
+ (MVC32 (f2FCMPNEZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETUNE)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPNEZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm, SETUO)), bb:$imm16),
+ (BT32 (f2FCMPUOZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm, SETUO)),
+ (MVC32 (f2FCMPUOZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm, SETUO)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPUOZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETGE)), bb:$imm16),
+ (BT32 (f2FCMPHSZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETGE)),
+ (MVC32 (f2FCMPHSZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETGE)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPHSZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETLT)), bb:$imm16),
+ (BT32 (f2FCMPLTZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETLT)),
+ (MVC32 (f2FCMPLTZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETLT)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPLTZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETLE)), bb:$imm16),
+ (BT32 (f2FCMPLSZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETLE)),
+ (MVC32 (f2FCMPLSZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETLE)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPLSZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETGT)), bb:$imm16),
+ (BT32 (f2FCMPHZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETGT)),
+ (MVC32 (f2FCMPHZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETGT)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPHZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+
+
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm, SETO)), bb:$imm16),
+ (BF32 (f2FCMPUOZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm, SETO)),
+ (MVCV32 (f2FCMPUOZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm, SETO)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPUOZ_S FPR32Op:$rs1), FPR32Op:$false, FPR32Op:$rx)>;
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETOEQ)), bb:$imm16),
+ (BF32 (f2FCMPNEZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETOEQ)),
+ (MVCV32 (f2FCMPNEZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETOEQ)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPNEZ_S FPR32Op:$rs1), FPR32Op:$false, FPR32Op:$rx)>;
+ def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETEQ)), bb:$imm16),
+ (BF32 (f2FCMPNEZ_S FPR32Op:$rs1), bb:$imm16)>;
+ def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETEQ)),
+ (MVCV32 (f2FCMPNEZ_S FPR32Op:$rs1))>;
+ def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETEQ)), FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S (f2FCMPNEZ_S FPR32Op:$rs1), FPR32Op:$false, FPR32Op:$rx)>;
+}
+
+
+let Predicates = [HasFPUv3_SF] in
+def : Pat<(select CARRY:$ca, FPR32Op:$rx, FPR32Op:$false),
+ (f2FSEL_S CARRY:$ca, FPR32Op:$rx, FPR32Op:$false)>;
+let Predicates = [HasFPUv3_DF] in
+def : Pat<(select CARRY:$ca, FPR64Op:$rx, FPR64Op:$false),
+ (f2FSEL_D CARRY:$ca, FPR64Op:$rx, FPR64Op:$false)>; \ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
index ade5c7f795af..b7f4fc17166b 100644
--- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
@@ -194,6 +194,8 @@ def FPR64 : RegisterClass<"CSKY", [f64], 64,
def sFPR64 : RegisterClass<"CSKY", [f64], 64,
(add (sequence "F%u_64", 0, 15))>;
+def sFPR64_V : RegisterClass<"CSKY", [v2f32], 32, (add sFPR64)>;
+
def FPR128 : RegisterClass<"CSKY",
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
(add (sequence "F%u_128", 0, 31))>;
diff --git a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
index 8f61feb6506d..94b24044c27d 100644
--- a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
+++ b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
@@ -23,6 +23,9 @@ using namespace llvm;
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYTarget() {
RegisterTargetMachine<CSKYTargetMachine> X(getTheCSKYTarget());
+
+ PassRegistry *Registry = PassRegistry::getPassRegistry();
+ initializeCSKYConstantIslandsPass(*Registry);
}
static std::string computeDataLayout(const Triple &TT) {
@@ -92,6 +95,7 @@ public:
}
bool addInstSelector() override;
+ void addPreEmitPass() override;
};
} // namespace
@@ -105,3 +109,7 @@ bool CSKYPassConfig::addInstSelector() {
return false;
}
+
+void CSKYPassConfig::addPreEmitPass() {
+ addPass(createCSKYConstantIslandPass());
+}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
index 7001de999a51..07757f03c258 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
@@ -73,6 +73,13 @@ void CSKYInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
O << getRegisterName(RegNo);
}
+void CSKYInstPrinter::printFPRRegName(raw_ostream &O, unsigned RegNo) const {
+ if (PrintBranchImmAsAddress)
+ O << getRegisterName(RegNo, CSKY::NoRegAltName);
+ else
+ O << getRegisterName(RegNo);
+}
+
void CSKYInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O,
const char *Modifier) {
@@ -201,3 +208,11 @@ const char *CSKYInstPrinter::getRegisterName(unsigned RegNo) {
return getRegisterName(RegNo, ArchRegNames ? CSKY::NoRegAltName
: CSKY::ABIRegAltName);
}
+
+void CSKYInstPrinter::printFPR(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ assert(MO.isReg());
+
+ printFPRRegName(O, MO.getReg());
+}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
index f93a342ec6a3..52a1b9276762 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
@@ -36,6 +36,8 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O, const char *Modifier = nullptr);
+ void printFPRRegName(raw_ostream &O, unsigned RegNo) const;
+
// Autogenerated by tblgen.
std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address,
@@ -60,6 +62,8 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
void printSPAddr(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printFPR(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
static const char *getRegisterName(unsigned RegNo, unsigned AltIdx);
};
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp
index 668247bbbd87..543f2e3d43d4 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp
@@ -22,4 +22,6 @@ CSKYMCAsmInfo::CSKYMCAsmInfo(const Triple &TargetTriple) {
AlignmentIsInBytes = false;
SupportsDebugInformation = true;
CommentString = "#";
+
+ ExceptionsType = ExceptionHandling::DwarfCFI;
}
diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index d131cf896834..15eba89eeb55 100644
--- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -211,8 +211,7 @@ struct HexagonOperand : public MCParsedAsmOperand {
struct ImmTy Imm;
};
- HexagonOperand(KindTy K, MCContext &Context)
- : MCParsedAsmOperand(), Kind(K), Context(Context) {}
+ HexagonOperand(KindTy K, MCContext &Context) : Kind(K), Context(Context) {}
public:
HexagonOperand(const HexagonOperand &o)
diff --git a/llvm/lib/Target/Hexagon/BitTracker.cpp b/llvm/lib/Target/Hexagon/BitTracker.cpp
index 685bafd785df..17adf32750db 100644
--- a/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -940,8 +940,8 @@ void BT::visitBranchesFrom(const MachineInstr &BI) {
// If evaluated successfully add the targets to the cumulative list.
if (Trace) {
dbgs() << " adding targets:";
- for (unsigned i = 0, n = BTs.size(); i < n; ++i)
- dbgs() << " " << printMBBReference(*BTs[i]);
+ for (const MachineBasicBlock *BT : BTs)
+ dbgs() << " " << printMBBReference(*BT);
if (FallsThrough)
dbgs() << "\n falls through\n";
else
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 428d25da6dbc..b2a842233bb8 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -3260,13 +3260,12 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
dbgs() << "Group[" << i << "] inp: "
<< printReg(G.Inp.Reg, HRI, G.Inp.Sub)
<< " out: " << printReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
- for (unsigned j = 0, m = G.Ins.size(); j < m; ++j)
- dbgs() << " " << *G.Ins[j];
+ for (const MachineInstr *MI : G.Ins)
+ dbgs() << " " << MI;
}
});
- for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
- InstrGroup &G = Groups[i];
+ for (InstrGroup &G : Groups) {
if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
continue;
auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
diff --git a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index 1938a5c259da..8e014b395286 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -493,6 +493,11 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
RegisterCell RC = eADD(rc(1), lo(M, W0));
return rr0(RC, Outputs);
}
+ case M2_mnaci: {
+ RegisterCell M = eMLS(rc(2), rc(3));
+ RegisterCell RC = eSUB(rc(1), lo(M, W0));
+ return rr0(RC, Outputs);
+ }
case M2_mpysmi: {
RegisterCell M = eMLS(rc(1), eIMM(im(2), W0));
return rr0(lo(M, 32), Outputs);
diff --git a/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index b456cf139c55..a31ad45f4bb0 100644
--- a/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -118,13 +118,10 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
return false;
// Loop over all of the basic blocks.
- for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
- MBBb != MBBe; ++MBBb) {
- MachineBasicBlock *MBB = &*MBBb;
-
+ for (MachineBasicBlock &MBB : Fn) {
// Traverse the basic block.
- MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
- if (MII != MBB->end()) {
+ MachineBasicBlock::iterator MII = MBB.getFirstTerminator();
+ if (MII != MBB.end()) {
MachineInstr &MI = *MII;
int Opc = MI.getOpcode();
if (IsConditionalBranch(Opc)) {
@@ -155,17 +152,17 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
// Remove BB2
// BB3: ...
// BB4: ...
- unsigned NumSuccs = MBB->succ_size();
- MachineBasicBlock::succ_iterator SI = MBB->succ_begin();
+ unsigned NumSuccs = MBB.succ_size();
+ MachineBasicBlock::succ_iterator SI = MBB.succ_begin();
MachineBasicBlock* FirstSucc = *SI;
MachineBasicBlock* SecondSucc = *(++SI);
MachineBasicBlock* LayoutSucc = nullptr;
MachineBasicBlock* JumpAroundTarget = nullptr;
- if (MBB->isLayoutSuccessor(FirstSucc)) {
+ if (MBB.isLayoutSuccessor(FirstSucc)) {
LayoutSucc = FirstSucc;
JumpAroundTarget = SecondSucc;
- } else if (MBB->isLayoutSuccessor(SecondSucc)) {
+ } else if (MBB.isLayoutSuccessor(SecondSucc)) {
LayoutSucc = SecondSucc;
JumpAroundTarget = FirstSucc;
} else {
@@ -201,7 +198,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
if (case1 || case2) {
InvertAndChangeJumpTarget(MI, UncondTarget);
- MBB->replaceSuccessor(JumpAroundTarget, UncondTarget);
+ MBB.replaceSuccessor(JumpAroundTarget, UncondTarget);
// Remove the unconditional branch in LayoutSucc.
LayoutSucc->erase(LayoutSucc->begin());
diff --git a/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/llvm/lib/Target/Hexagon/HexagonCallingConv.td
index 93e17e608dd1..cc41b569e490 100644
--- a/llvm/lib/Target/Hexagon/HexagonCallingConv.td
+++ b/llvm/lib/Target/Hexagon/HexagonCallingConv.td
@@ -126,16 +126,16 @@ def CC_Hexagon_HVX: CallingConv<[
// HVX 128-byte mode
CCIfHvx128<
- CCIfType<[v32i32,v64i16,v128i8],
+ CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16],
CCAssignToReg<[V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15]>>>,
CCIfHvx128<
- CCIfType<[v64i32,v128i16,v256i8],
+ CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16],
CCAssignToReg<[W0,W1,W2,W3,W4,W5,W6,W7]>>>,
CCIfHvx128<
- CCIfType<[v32i32,v64i16,v128i8],
+ CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16],
CCAssignToStack<128,128>>>,
CCIfHvx128<
- CCIfType<[v64i32,v128i16,v256i8],
+ CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16],
CCAssignToStack<256,128>>>,
CCDelegateTo<CC_Hexagon>
@@ -152,10 +152,10 @@ def RetCC_Hexagon_HVX: CallingConv<[
// HVX 128-byte mode
CCIfHvx128<
- CCIfType<[v32i32,v64i16,v128i8],
+ CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16],
CCAssignToReg<[V0]>>>,
CCIfHvx128<
- CCIfType<[v64i32,v128i16,v256i8],
+ CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16],
CCAssignToReg<[W0]>>>,
CCDelegateTo<RetCC_Hexagon>
diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index a53efeb96961..fc5e05d8c9a0 100644
--- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -290,13 +290,11 @@ namespace {
raw_ostream &operator<< (raw_ostream &OS,
const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED;
raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){
- using const_iterator = NodeToUsesMap::const_iterator;
-
- for (const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
- const UseSet &Us = I->second;
- OS << I->first << " -> #" << Us.size() << '{';
- for (UseSet::const_iterator J = Us.begin(), F = Us.end(); J != F; ++J) {
- User *R = (*J)->getUser();
+ for (const auto &I : M) {
+ const UseSet &Us = I.second;
+ OS << I.first << " -> #" << Us.size() << '{';
+ for (const Use *U : Us) {
+ User *R = U->getUser();
if (R->hasName())
OS << ' ' << R->getName();
else
@@ -420,15 +418,12 @@ void HexagonCommonGEP::collect() {
// instruction that uses another GEP instruction as the base pointer, the
// gep node for the base pointer should already exist.
ValueToNodeMap NM;
- for (ValueVect::iterator I = BO.begin(), E = BO.end(); I != E; ++I) {
- BasicBlock *B = cast<BasicBlock>(*I);
- for (BasicBlock::iterator J = B->begin(), F = B->end(); J != F; ++J) {
- if (!isa<GetElementPtrInst>(J))
- continue;
- GetElementPtrInst *GepI = cast<GetElementPtrInst>(J);
- if (isHandledGepForm(GepI))
- processGepInst(GepI, NM);
- }
+ for (Value *I : BO) {
+ BasicBlock *B = cast<BasicBlock>(I);
+ for (Instruction &J : *B)
+ if (auto *GepI = dyn_cast<GetElementPtrInst>(&J))
+ if (isHandledGepForm(GepI))
+ processGepInst(GepI, NM);
}
LLVM_DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes);
@@ -436,17 +431,14 @@ void HexagonCommonGEP::collect() {
static void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM,
NodeVect &Roots) {
- using const_iterator = NodeVect::const_iterator;
-
- for (const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
- GepNode *N = *I;
- if (N->Flags & GepNode::Root) {
- Roots.push_back(N);
- continue;
- }
- GepNode *PN = N->Parent;
- NCM[PN].push_back(N);
+ for (GepNode *N : Nodes) {
+ if (N->Flags & GepNode::Root) {
+ Roots.push_back(N);
+ continue;
}
+ GepNode *PN = N->Parent;
+ NCM[PN].push_back(N);
+ }
}
static void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM,
@@ -546,8 +538,7 @@ void HexagonCommonGEP::common() {
using NodeSetMap = std::map<unsigned, NodeSet>;
NodeSetMap MaybeEq;
- for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
- GepNode *N = *I;
+ for (GepNode *N : Nodes) {
unsigned H = node_hash(N);
MaybeEq[H].insert(N);
}
@@ -556,9 +547,8 @@ void HexagonCommonGEP::common() {
// one for equality and the other for non-equality.
NodeSymRel EqRel; // Equality relation (as set of equivalence classes).
NodePairSet Eq, Ne; // Caches.
- for (NodeSetMap::iterator I = MaybeEq.begin(), E = MaybeEq.end();
- I != E; ++I) {
- NodeSet &S = I->second;
+ for (auto &I : MaybeEq) {
+ NodeSet &S = I.second;
for (NodeSet::iterator NI = S.begin(), NE = S.end(); NI != NE; ++NI) {
GepNode *N = *NI;
// If node already has a class, then the class must have been created
@@ -612,8 +602,7 @@ void HexagonCommonGEP::common() {
// Update the min element's flags, and user list.
uint32_t Flags = 0;
UseSet &MinUs = Uses[Min];
- for (NodeSet::iterator J = S.begin(), F = S.end(); J != F; ++J) {
- GepNode *N = *J;
+ for (GepNode *N : S) {
uint32_t NF = N->Flags;
// If N is used, append all original values of N to the list of
// original values of Min.
@@ -633,8 +622,7 @@ void HexagonCommonGEP::common() {
// selected (minimum) node from the corresponding equivalence class.
// If a given parent does not have an equivalence class, leave it
// unchanged (it means that it's the only element in its class).
- for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
- GepNode *N = *I;
+ for (GepNode *N : Nodes) {
if (N->Flags & GepNode::Root)
continue;
const NodeSet *PC = node_class(N->Parent, EqRel);
@@ -652,8 +640,7 @@ void HexagonCommonGEP::common() {
// Finally, erase the nodes that are no longer used.
NodeSet Erase;
- for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
- GepNode *N = *I;
+ for (GepNode *N : Nodes) {
const NodeSet *PC = node_class(N, EqRel);
if (!PC)
continue;
@@ -663,7 +650,7 @@ void HexagonCommonGEP::common() {
if (N == F->second)
continue;
// Node for removal.
- Erase.insert(*I);
+ Erase.insert(N);
}
erase_if(Nodes, in_set(Erase));
@@ -775,8 +762,7 @@ BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node,
NodeToUsesMap::iterator UF = Uses.find(Node);
assert(UF != Uses.end() && "Used node with no use information");
UseSet &Us = UF->second;
- for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) {
- Use *U = *I;
+ for (Use *U : Us) {
User *R = U->getUser();
if (!isa<Instruction>(R))
continue;
@@ -790,8 +776,7 @@ BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node,
NodeChildrenMap::iterator CF = NCM.find(Node);
if (CF != NCM.end()) {
NodeVect &Cs = CF->second;
- for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) {
- GepNode *CN = *I;
+ for (GepNode *CN : Cs) {
NodeToValueMap::iterator LF = Loc.find(CN);
// If the child is only used in GEP instructions (i.e. is not used in
// non-GEP instructions), the nearest dominator computed for it may
@@ -831,8 +816,8 @@ BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node,
NodeChildrenMap::iterator CF = NCM.find(Node);
if (CF != NCM.end()) {
NodeVect &Cs = CF->second;
- for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I)
- recalculatePlacementRec(*I, NCM, Loc);
+ for (GepNode *C : Cs)
+ recalculatePlacementRec(C, NCM, Loc);
}
BasicBlock *LB = recalculatePlacement(Node, NCM, Loc);
LLVM_DEBUG(dbgs() << "LocRec end for node:" << Node << '\n');
@@ -921,8 +906,8 @@ BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node,
NodeChildrenMap::iterator CF = NCM.find(Node);
if (CF != NCM.end()) {
NodeVect &Cs = CF->second;
- for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I)
- adjustForInvariance(*I, NCM, Loc);
+ for (GepNode *C : Cs)
+ adjustForInvariance(C, NCM, Loc);
}
return LocB;
}
@@ -938,10 +923,9 @@ namespace {
raw_ostream &operator<< (raw_ostream &OS,
const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ;
raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) {
- for (NodeToValueMap::const_iterator I = Loc.Map.begin(), E = Loc.Map.end();
- I != E; ++I) {
- OS << I->first << " -> ";
- if (BasicBlock *B = cast_or_null<BasicBlock>(I->second))
+ for (const auto &I : Loc.Map) {
+ OS << I.first << " -> ";
+ if (BasicBlock *B = cast_or_null<BasicBlock>(I.second))
OS << B->getName() << '(' << B << ')';
else
OS << "<null-block>";
@@ -1016,8 +1000,7 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node,
// Collect all used nodes together with the uses from loads and stores,
// where the GEP node could be folded into the load/store instruction.
NodeToUsesMap FNs; // Foldable nodes.
- for (NodeSet::iterator I = Ns.begin(), E = Ns.end(); I != E; ++I) {
- GepNode *N = *I;
+ for (GepNode *N : Ns) {
if (!(N->Flags & GepNode::Used))
continue;
NodeToUsesMap::iterator UF = Uses.find(N);
@@ -1025,8 +1008,7 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node,
UseSet &Us = UF->second;
// Loads/stores that use the node N.
UseSet LSs;
- for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J) {
- Use *U = *J;
+ for (Use *U : Us) {
User *R = U->getUser();
// We're interested in uses that provide the address. It can happen
// that the value may also be provided via GEP, but we won't handle
@@ -1051,11 +1033,11 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node,
LLVM_DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs);
- for (NodeToUsesMap::iterator I = FNs.begin(), E = FNs.end(); I != E; ++I) {
- GepNode *N = I->first;
- UseSet &Us = I->second;
- for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J)
- separateChainForNode(N, *J, Loc);
+ for (auto &FN : FNs) {
+ GepNode *N = FN.first;
+ UseSet &Us = FN.second;
+ for (Use *U : Us)
+ separateChainForNode(N, U, Loc);
}
}
@@ -1068,21 +1050,21 @@ void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) {
// Compute the initial placement determined by the users' locations, and
// the locations of the child nodes.
- for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
- recalculatePlacementRec(*I, NCM, Loc);
+ for (GepNode *Root : Roots)
+ recalculatePlacementRec(Root, NCM, Loc);
LLVM_DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc));
if (OptEnableInv) {
- for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
- adjustForInvariance(*I, NCM, Loc);
+ for (GepNode *Root : Roots)
+ adjustForInvariance(Root, NCM, Loc);
LLVM_DEBUG(dbgs() << "Node placement after adjustment for invariance:\n"
<< LocationAsBlock(Loc));
}
if (OptEnableConst) {
- for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
- separateConstantChains(*I, NCM, Loc);
+ for (GepNode *Root : Roots)
+ separateConstantChains(Root, NCM, Loc);
}
LLVM_DEBUG(dbgs() << "Node use information:\n" << Uses);
@@ -1153,8 +1135,8 @@ void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values,
NodeToUsesMap::iterator UF = Uses.find(N);
assert(UF != Uses.end() && "No use information for used node");
UseSet &Us = UF->second;
- for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I)
- Values.push_back((*I)->getUser());
+ for (const auto &U : Us)
+ Values.push_back(U->getUser());
}
NodeChildrenMap::iterator CF = NCM.find(N);
if (CF != NCM.end()) {
@@ -1223,8 +1205,7 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
// to the Roots list.
if (LastCN > 0) {
NodeVect &Cs = NCM[Last];
- for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) {
- GepNode *CN = *I;
+ for (GepNode *CN : Cs) {
CN->Flags &= ~GepNode::Internal;
CN->Flags |= GepNode::Root;
CN->BaseVal = NewInst;
@@ -1238,10 +1219,8 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
NodeToUsesMap::iterator UF = Uses.find(Last);
assert(UF != Uses.end() && "No use information found");
UseSet &Us = UF->second;
- for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) {
- Use *U = *I;
+ for (Use *U : Us)
U->set(NewInst);
- }
}
}
}
@@ -1261,8 +1240,8 @@ void HexagonCommonGEP::removeDeadCode() {
ValueVect Ins;
for (Instruction &I : llvm::reverse(*B))
Ins.push_back(&I);
- for (ValueVect::iterator I = Ins.begin(), E = Ins.end(); I != E; ++I) {
- Instruction *In = cast<Instruction>(*I);
+ for (Value *I : Ins) {
+ Instruction *In = cast<Instruction>(I);
if (isInstructionTriviallyDead(In))
In->eraseFromParent();
}
diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index d3fcdb6ae9a8..d8af35cbf3a8 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -229,7 +229,7 @@ namespace {
private:
struct Register {
Register() = default;
- Register(unsigned R, unsigned S) : Reg(R), Sub(S) {}
+ Register(llvm::Register R, unsigned S) : Reg(R), Sub(S) {}
Register(const MachineOperand &Op)
: Reg(Op.getReg()), Sub(Op.getSubReg()) {}
Register &operator=(const MachineOperand &Op) {
@@ -1573,7 +1573,7 @@ HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) {
// No compounds are available. It is not clear whether we should
// even process such extenders where the initializer cannot be
// a single instruction, but do it for now.
- unsigned TmpR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ llvm::Register TmpR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
BuildMI(MBB, At, dl, HII->get(Hexagon::S2_asl_i_r), TmpR)
.add(MachineOperand(Ex.Rs))
.addImm(Ex.S);
diff --git a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index daf311fc49d4..105bf2811a20 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -125,8 +125,8 @@ namespace {
};
LatticeCell() : Kind(Top), Size(0), IsSpecial(false) {
- for (unsigned i = 0; i < MaxCellSize; ++i)
- Values[i] = nullptr;
+ for (const Constant *&Value : Values)
+ Value = nullptr;
}
bool meet(const LatticeCell &L);
@@ -1029,8 +1029,8 @@ bool MachineConstPropagator::rewrite(MachineFunction &MF) {
ToRemove.push_back(const_cast<MachineBasicBlock*>(SB));
Targets.remove(SB);
}
- for (unsigned i = 0, n = ToRemove.size(); i < n; ++i)
- removeCFGEdge(B, ToRemove[i]);
+ for (MachineBasicBlock *MBB : ToRemove)
+ removeCFGEdge(B, MBB);
// If there are any blocks left in the computed targets, it means that
// we think that the block could go somewhere, but the CFG does not.
// This could legitimately happen in blocks that have non-returning
diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 03b0f75b2dc1..2ee7f1325df9 100644
--- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -70,9 +70,7 @@ class HexagonCopyToCombine : public MachineFunctionPass {
public:
static char ID;
- HexagonCopyToCombine() : MachineFunctionPass(ID) {
- initializeHexagonCopyToCombinePass(*PassRegistry::getPassRegistry());
- }
+ HexagonCopyToCombine() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 9a3feb5b6af1..2207925ceeba 100644
--- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -612,8 +612,8 @@ bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B,
// Simply keep a list of children of B, and traverse that list.
using DTNodeVectType = SmallVector<MachineDomTreeNode *, 4>;
DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
- for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
- MachineBasicBlock *SB = (*I)->getBlock();
+ for (auto &I : Cn) {
+ MachineBasicBlock *SB = I->getBlock();
if (!Deleted.count(SB))
Changed |= visitBlock(SB, L);
}
@@ -648,8 +648,8 @@ bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) {
<< "\n");
bool Changed = false;
if (L) {
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- Changed |= visitLoop(*I);
+ for (MachineLoop *I : *L)
+ Changed |= visitLoop(I);
}
MachineBasicBlock *EntryB = GraphTraits<MachineFunction*>::getEntryNode(MFN);
@@ -964,8 +964,8 @@ void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
using DTNodeVectType = SmallVector<MachineDomTreeNode *, 4>;
DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
- for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
- MachineBasicBlock *SB = (*I)->getBlock();
+ for (auto &I : Cn) {
+ MachineBasicBlock *SB = I->getBlock();
MDT->changeImmediateDominator(SB, IDB);
}
}
@@ -973,8 +973,8 @@ void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
while (!B->succ_empty())
B->removeSuccessor(B->succ_begin());
- for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I)
- (*I)->removeSuccessor(B, true);
+ for (MachineBasicBlock *Pred : B->predecessors())
+ Pred->removeSuccessor(B, true);
Deleted.insert(B);
MDT->eraseNode(B);
@@ -1064,8 +1064,8 @@ bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) {
Deleted.clear();
bool Changed = false;
- for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
- Changed |= visitLoop(*I);
+ for (MachineLoop *L : *MLI)
+ Changed |= visitLoop(L);
Changed |= visitLoop(nullptr);
return Changed;
diff --git a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index c444cf557c21..2693940bb1e9 100644
--- a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -1106,8 +1106,7 @@ bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
}
bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
- for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
- LiveRange::Segment &LR = *I;
+ for (LiveRange::Segment &LR : LI) {
// Range must start at a register...
if (!LR.start.isRegister())
return false;
@@ -1160,16 +1159,16 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
// Move all live segments from L2 to L1.
using ValueInfoMap = DenseMap<VNInfo *, VNInfo *>;
ValueInfoMap VM;
- for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) {
- VNInfo *NewVN, *OldVN = I->valno;
+ for (LiveRange::Segment &I : L2) {
+ VNInfo *NewVN, *OldVN = I.valno;
ValueInfoMap::iterator F = VM.find(OldVN);
if (F == VM.end()) {
- NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator());
+ NewVN = L1.getNextValue(I.valno->def, LIS->getVNInfoAllocator());
VM.insert(std::make_pair(OldVN, NewVN));
} else {
NewVN = F->second;
}
- L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN));
+ L1.addSegment(LiveRange::Segment(I.start, I.end, NewVN));
}
while (!L2.empty())
L2.removeSegment(*L2.begin());
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 12ceac545e9d..989a98571434 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -416,8 +416,8 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
UnsignedMap RPO;
RPOTType RPOT(&MF);
unsigned RPON = 0;
- for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
- RPO[(*I)->getNumber()] = RPON++;
+ for (auto &I : RPOT)
+ RPO[I->getNumber()] = RPON++;
// Don't process functions that have loops, at least for now. Placement
// of prolog and epilog must take loop structure into account. For simpli-
@@ -1410,7 +1410,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
}
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
// Add live in registers. We treat eh_return callee saved register r0 - r3
// specially. They are not really callee saved registers as they are not
// supposed to be killed.
@@ -1479,7 +1479,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
}
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
int FI = I.getFrameIdx();
HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
@@ -1620,7 +1620,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
// sub-registers to SRegs.
LLVM_DEBUG(dbgs() << "Initial CS registers: {");
for (const CalleeSavedInfo &I : CSI) {
- unsigned R = I.getReg();
+ Register R = I.getReg();
LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI));
for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
SRegs[*SR] = true;
@@ -2635,7 +2635,7 @@ bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
// a contiguous block starting from D8.
BitVector Regs(Hexagon::NUM_TARGET_REGS);
for (const CalleeSavedInfo &I : CSI) {
- unsigned R = I.getReg();
+ Register R = I.getReg();
if (!Hexagon::DoubleRegsRegClass.contains(R))
return true;
Regs[R] = true;
diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 85230cac9d7c..0bb1658e7698 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -583,14 +583,12 @@ namespace {
char HexagonGenInsert::ID = 0;
void HexagonGenInsert::dump_map() const {
- using iterator = IFMapType::const_iterator;
-
- for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
- dbgs() << " " << printReg(I->first, HRI) << ":\n";
- const IFListType &LL = I->second;
- for (unsigned i = 0, n = LL.size(); i < n; ++i)
- dbgs() << " " << PrintIFR(LL[i].first, HRI) << ", "
- << PrintRegSet(LL[i].second, HRI) << '\n';
+ for (const auto &I : IFMap) {
+ dbgs() << " " << printReg(I.first, HRI) << ":\n";
+ const IFListType &LL = I.second;
+ for (const auto &J : LL)
+ dbgs() << " " << PrintIFR(J.first, HRI) << ", "
+ << PrintRegSet(J.second, HRI) << '\n';
}
}
@@ -627,8 +625,8 @@ void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
using SortableVectorType = std::vector<unsigned>;
SortableVectorType VRs;
- for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I)
- VRs.push_back(I->first);
+ for (auto &I : RB)
+ VRs.push_back(I.first);
llvm::sort(VRs, LexCmp);
// Transfer the results to the outgoing register ordering.
for (unsigned i = 0, n = VRs.size(); i < n; ++i)
@@ -853,20 +851,18 @@ bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
if (isDebug()) {
dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n";
- for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) {
- dbgs() << " L=" << I->first << ':';
- const RSListType &LL = I->second;
- for (unsigned i = 0, n = LL.size(); i < n; ++i)
- dbgs() << " (" << printReg(LL[i].first, HRI) << ",@"
- << LL[i].second << ')';
+ for (const auto &I : LM) {
+ dbgs() << " L=" << I.first << ':';
+ const RSListType &LL = I.second;
+ for (const auto &J : LL)
+ dbgs() << " (" << printReg(J.first, HRI) << ",@" << J.second << ')';
dbgs() << '\n';
}
}
bool Recorded = false;
- for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) {
- unsigned SrcR = *I;
+ for (unsigned SrcR : AVs) {
int FDi = -1, LDi = -1; // First/last different bit.
const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
uint16_t AW = AC.width();
@@ -888,8 +884,8 @@ bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
if (F == LM.end())
continue;
RSListType &LL = F->second;
- for (unsigned i = 0, n = LL.size(); i < n; ++i) {
- uint16_t S = LL[i].second;
+ for (const auto &I : LL) {
+ uint16_t S = I.second;
// MinL is the minimum length of the prefix. Any length above MinL
// allows some flexibility as to where the prefix can start:
// given the extra length EL=L-MinL, the prefix must start between
@@ -900,7 +896,7 @@ bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
uint16_t LowS = (EL < FD) ? FD-EL : 0;
if (S < LowS) // Starts too early.
continue;
- unsigned InsR = LL[i].first;
+ unsigned InsR = I.first;
if (!isValidInsertForm(VR, SrcR, InsR, L, S))
continue;
if (isDebug()) {
@@ -1029,10 +1025,10 @@ void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
}
void HexagonGenInsert::computeRemovableRegisters() {
- for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
- IFListType &LL = I->second;
- for (unsigned i = 0, n = LL.size(); i < n; ++i)
- findRemovableRegisters(I->first, LL[i].first, LL[i].second);
+ for (auto &I : IFMap) {
+ IFListType &LL = I.second;
+ for (auto &J : LL)
+ findRemovableRegisters(I.first, J.first, J.second);
}
}
@@ -1064,8 +1060,8 @@ void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
MachineInstr *DefVR = MRI->getVRegDef(VR);
bool DefEx = HII->isConstExtended(*DefVR);
bool HasNE = false;
- for (unsigned i = 0, n = LL.size(); i < n; ++i) {
- if (LL[i].second.empty())
+ for (const auto &I : LL) {
+ if (I.second.empty())
continue;
HasNE = true;
break;
@@ -1172,8 +1168,8 @@ void HexagonGenInsert::pruneCandidates() {
// selection method.
// First, remove candidates whose potentially removable set is a subset
// of another candidate's set.
- for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
- pruneCoveredSets(I->first);
+ for (const auto &I : IFMap)
+ pruneCoveredSets(I.first);
UnsignedMap RPO;
@@ -1181,18 +1177,18 @@ void HexagonGenInsert::pruneCandidates() {
RPOTType RPOT(MFN);
unsigned RPON = 0;
- for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
- RPO[(*I)->getNumber()] = RPON++;
+ for (const auto &I : RPOT)
+ RPO[I->getNumber()] = RPON++;
PairMapType Memo; // Memoization map for distance calculation.
// Remove candidates that would use registers defined too far away.
- for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
- pruneUsesTooFar(I->first, RPO, Memo);
+ for (const auto &I : IFMap)
+ pruneUsesTooFar(I.first, RPO, Memo);
pruneEmptyLists();
- for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
- pruneRegCopies(I->first);
+ for (const auto &I : IFMap)
+ pruneRegCopies(I.first);
}
namespace {
@@ -1277,8 +1273,8 @@ void HexagonGenInsert::selectCandidates() {
for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
const IFListType &LL = I->second;
RegisterSet TT;
- for (unsigned i = 0, n = LL.size(); i < n; ++i)
- TT.insert(LL[i].second);
+ for (const auto &J : LL)
+ TT.insert(J.second);
for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
RemC[R]++;
AllRMs.insert(TT);
@@ -1384,8 +1380,8 @@ bool HexagonGenInsert::generateInserts() {
// Create a new register for each one from IFMap, and store them in the
// map.
UnsignedMap RegMap;
- for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
- unsigned VR = I->first;
+ for (auto &I : IFMap) {
+ unsigned VR = I.first;
const TargetRegisterClass *RC = MRI->getRegClass(VR);
Register NewVR = MRI->createVirtualRegister(RC);
RegMap[VR] = NewVR;
@@ -1394,15 +1390,15 @@ bool HexagonGenInsert::generateInserts() {
// We can generate the "insert" instructions using potentially stale re-
// gisters: SrcR and InsR for a given VR may be among other registers that
// are also replaced. This is fine, we will do the mass "rauw" a bit later.
- for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
- MachineInstr *MI = MRI->getVRegDef(I->first);
+ for (auto &I : IFMap) {
+ MachineInstr *MI = MRI->getVRegDef(I.first);
MachineBasicBlock &B = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
- unsigned NewR = RegMap[I->first];
+ unsigned NewR = RegMap[I.first];
bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
: HII->get(Hexagon::S2_insertp);
- IFRecord IF = I->second[0].first;
+ IFRecord IF = I.second[0].first;
unsigned Wdh = IF.Wdh, Off = IF.Off;
unsigned InsS = 0;
if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
@@ -1428,9 +1424,9 @@ bool HexagonGenInsert::generateInserts() {
MRI->clearKillFlags(IF.InsR);
}
- for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
- MachineInstr *DefI = MRI->getVRegDef(I->first);
- MRI->replaceRegWith(I->first, RegMap[I->first]);
+ for (const auto &I : IFMap) {
+ MachineInstr *DefI = MRI->getVRegDef(I.first);
+ MRI->replaceRegWith(I.first, RegMap[I.first]);
DefI->eraseFromParent();
}
@@ -1523,9 +1519,8 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
if (isDebug()) {
dbgs() << "Cell ordering:\n";
- for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end();
- I != E; ++I) {
- unsigned VR = I->first, Pos = I->second;
+ for (const auto &I : CellOrd) {
+ unsigned VR = I.first, Pos = I.second;
dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n";
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index 1a66394e9757..00615f355146 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -505,8 +505,8 @@ bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
collectPredicateGPR(MF);
- for (SetOfReg::iterator I = PredGPRs.begin(), E = PredGPRs.end(); I != E; ++I)
- processPredicateGPR(*I);
+ for (const RegisterSubReg &R : PredGPRs)
+ processPredicateGPR(R);
bool Again;
do {
diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 5d2e1b259449..43afae441457 100644
--- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -1127,8 +1127,8 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
bool L1Used = false;
// Process nested loops first.
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
- Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
+ for (MachineLoop *I : *L) {
+ Changed |= convertToHardwareLoop(I, RecL0used, RecL1used);
L0Used |= RecL0used;
L1Used |= RecL1used;
}
@@ -1587,16 +1587,6 @@ void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
MO.setReg(NewR);
}
-static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
- // These two instructions are not extendable.
- if (CmpOpc == Hexagon::A4_cmpbeqi)
- return isUInt<8>(Imm);
- if (CmpOpc == Hexagon::A4_cmpbgti)
- return isInt<8>(Imm);
- // The rest of the comparison-with-immediate instructions are extendable.
- return true;
-}
-
bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
MachineBasicBlock *Header = L->getHeader();
MachineBasicBlock *Latch = L->getLoopLatch();
@@ -1812,9 +1802,9 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
// Most comparisons of register against an immediate value allow
// the immediate to be constant-extended. There are some exceptions
// though. Make sure the new combination will work.
- if (CmpImmOp->isImm())
- if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
- return false;
+ if (CmpImmOp->isImm() && !TII->isExtendable(*PredDef) &&
+ !TII->isValidOffset(PredDef->getOpcode(), CmpImm, TRI, false))
+ return false;
// Make sure that the compare happens after the bump. Otherwise,
// after the fixup, the compare would use a yet-undefined register.
diff --git a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
index 44679d429de5..e2215c9900d0 100644
--- a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
@@ -44,12 +44,7 @@ HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) {
if (!Resources->canReserveResources(*MI)) {
LLVM_DEBUG(dbgs() << "*** Hazard in cycle " << PacketNum << ", " << *MI);
HazardType RetVal = Hazard;
- if (TII->mayBeNewStore(*MI)) {
- // Make sure the register to be stored is defined by an instruction in the
- // packet.
- MachineOperand &MO = MI->getOperand(MI->getNumOperands() - 1);
- if (!MO.isReg() || RegDefs.count(MO.getReg()) == 0)
- return Hazard;
+ if (isNewStore(*MI)) {
// The .new store version uses different resources so check if it
// causes a hazard.
MachineFunction *MF = MI->getParent()->getParent();
@@ -105,6 +100,15 @@ bool HexagonHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
return UsesDotCur && ((SU == UsesDotCur) ^ (DotCurPNum == (int)PacketNum));
}
+/// Return true if the instruction would be converted to a new value store when
+/// packetized.
+bool HexagonHazardRecognizer::isNewStore(MachineInstr &MI) {
+ if (!TII->mayBeNewStore(MI))
+ return false;
+ MachineOperand &MO = MI.getOperand(MI.getNumOperands() - 1);
+ return (MO.isReg() && RegDefs.count(MO.getReg()) != 0);
+}
+
void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) {
MachineInstr *MI = SU->getInstr();
if (!MI)
@@ -119,7 +123,7 @@ void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) {
if (TII->isZeroCost(MI->getOpcode()))
return;
- if (!Resources->canReserveResources(*MI)) {
+ if (!Resources->canReserveResources(*MI) || isNewStore(*MI)) {
// It must be a .new store since other instructions must be able to be
// reserved at this point.
assert(TII->mayBeNewStore(*MI) && "Expecting .new store");
@@ -127,11 +131,12 @@ void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) {
MachineInstr *NewMI =
MF->CreateMachineInstr(TII->get(TII->getDotNewOp(*MI)),
MI->getDebugLoc());
- assert(Resources->canReserveResources(*NewMI));
- Resources->reserveResources(*NewMI);
+ if (Resources->canReserveResources(*NewMI))
+ Resources->reserveResources(*NewMI);
+ else
+ Resources->reserveResources(*MI);
MF->deleteMachineInstr(NewMI);
- }
- else
+ } else
Resources->reserveResources(*MI);
LLVM_DEBUG(dbgs() << " Add instruction " << *MI);
diff --git a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h
index 53b9cb43b4b6..0528cbd1f15f 100644
--- a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h
+++ b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h
@@ -40,6 +40,10 @@ class HexagonHazardRecognizer : public ScheduleHazardRecognizer {
// The set of registers defined by instructions in the current packet.
SmallSet<unsigned, 8> RegDefs;
+ // Return true if the instruction is a store that is converted to a new value
+ // store because its value is defined in the same packet.
+ bool isNewStore(MachineInstr &MI);
+
public:
HexagonHazardRecognizer(const InstrItineraryData *II,
const HexagonInstrInfo *HII,
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 2679e399852f..161768b8dc22 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -1176,6 +1176,9 @@ void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
EVT UVT = U->getValueType(0);
if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
continue;
+ // Do not generate select for all i1 vector type.
+ if (UVT.isVector() && UVT.getVectorElementType() == MVT::i1)
+ continue;
if (isMemOPCandidate(N, U))
continue;
@@ -1282,7 +1285,7 @@ void HexagonDAGToDAGISel::emitFunctionEntryCode() {
MachineFrameInfo &MFI = MF->getFrameInfo();
MachineBasicBlock *EntryBB = &MF->front();
- unsigned AR = FuncInfo->CreateReg(MVT::i32);
+ Register AR = FuncInfo->CreateReg(MVT::i32);
Align EntryMaxA = MFI.getMaxAlign();
BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR)
.addImm(EntryMaxA.value());
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index ed4874baf7c8..0a6dd727eb82 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -230,8 +230,7 @@ bool Coloring::color() {
WorkQ.push_back(N);
}
- for (unsigned I = 0; I < WorkQ.size(); ++I) {
- Node N = WorkQ[I];
+ for (Node N : WorkQ) {
NodeSet &Ns = Edges[N];
auto P = getUniqueColor(Ns);
if (P.first) {
@@ -270,8 +269,7 @@ bool Coloring::color() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Coloring::dump() const {
dbgs() << "{ Order: {";
- for (unsigned I = 0; I != Order.size(); ++I) {
- Node P = Order[I];
+ for (Node P : Order) {
if (P != Ignore)
dbgs() << ' ' << P;
else
@@ -761,8 +759,7 @@ void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
namespace {
struct ShuffleMask {
ShuffleMask(ArrayRef<int> M) : Mask(M) {
- for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
- int M = Mask[I];
+ for (int M : Mask) {
if (M == -1)
continue;
MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
@@ -935,8 +932,7 @@ static SmallVector<unsigned, 4> getInputSegmentList(ShuffleMask SM,
unsigned Shift = Log2_32(SegLen);
BitVector Segs(alignTo(SM.MaxSrc + 1, SegLen) >> Shift);
- for (int I = 0, E = SM.Mask.size(); I != E; ++I) {
- int M = SM.Mask[I];
+ for (int M : SM.Mask) {
if (M >= 0)
Segs.set(M >> Shift);
}
@@ -2397,6 +2393,7 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
SDValue Base = N->getOperand(4);
SDValue Modifier = N->getOperand(5);
SDValue Offset = N->getOperand(6);
+ SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
unsigned Opcode;
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
@@ -2418,7 +2415,8 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
}
SDVTList VTs = CurDAG->getVTList(MVT::Other);
- SDValue Ops[] = { Address, Predicate, Base, Modifier, Offset, Chain };
+ SDValue Ops[] = { Address, ImmOperand,
+ Predicate, Base, Modifier, Offset, Chain };
SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -2434,6 +2432,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
SDValue Base = N->getOperand(3);
SDValue Modifier = N->getOperand(4);
SDValue Offset = N->getOperand(5);
+ SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
unsigned Opcode;
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
@@ -2455,7 +2454,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
}
SDVTList VTs = CurDAG->getVTList(MVT::Other);
- SDValue Ops[] = { Address, Base, Modifier, Offset, Chain };
+ SDValue Ops[] = { Address, ImmOperand, Base, Modifier, Offset, Chain };
SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 88effed9f076..d7ca934a23e6 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -543,9 +543,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The Glue is necessary since all emitted instructions must be
// stuck together.
if (!CLI.IsTailCall) {
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, Glue);
+ for (const auto &R : RegsToPass) {
+ Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue);
Glue = Chain.getValue(1);
}
} else {
@@ -560,9 +559,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
//
// Do not flag preceding copytoreg stuff together with the following stuff.
Glue = SDValue();
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, Glue);
+ for (const auto &R : RegsToPass) {
+ Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue);
Glue = Chain.getValue(1);
}
Glue = SDValue();
@@ -589,10 +587,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Add argument registers to the end of the list so that they are
// known live into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
- }
+ for (const auto &R : RegsToPass)
+ Ops.push_back(DAG.getRegister(R.first, R.second.getValueType()));
const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
@@ -690,7 +686,7 @@ HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
case InlineAsm::Kind_RegDef:
case InlineAsm::Kind_RegDefEarlyClobber: {
for (; NumVals; --NumVals, ++i) {
- unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
+ Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
if (Reg != LR)
continue;
HMFI.setHasClobberLR(true);
@@ -1190,7 +1186,7 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
+ Register Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
@@ -1776,6 +1772,18 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+ // Special handling for half-precision floating point conversions.
+ // Lower half float conversions into library calls.
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+
// Handling of indexed loads/stores: default is "expand".
//
for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
@@ -1856,6 +1864,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
else
setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
+ // Routines to handle fp16 storage type.
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
+ setLibcallName(RTLIB::FPROUND_F64_F16, "__truncdfhf2");
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
+
// These cause problems when the shift amount is non-constant.
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
@@ -2204,8 +2217,7 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
// Express the shuffle mask in terms of bytes.
SmallVector<int,8> ByteMask;
unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
- for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- int M = Mask[i];
+ for (int M : Mask) {
if (M < 0) {
for (unsigned j = 0; j != ElemBytes; ++j)
ByteMask.push_back(-1);
@@ -2428,8 +2440,8 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
if (AllConst) {
int32_t V = (Consts[0]->getZExtValue() & 0xFF) |
(Consts[1]->getZExtValue() & 0xFF) << 8 |
- (Consts[1]->getZExtValue() & 0xFF) << 16 |
- Consts[2]->getZExtValue() << 24;
+ (Consts[2]->getZExtValue() & 0xFF) << 16 |
+ Consts[3]->getZExtValue() << 24;
return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
}
@@ -2720,7 +2732,6 @@ SDValue
HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
const {
if (Ty.isVector()) {
- assert(Ty.isInteger() && "Only integer vectors are supported here");
unsigned W = Ty.getSizeInBits();
if (W <= 64)
return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index d518c036f125..f9ce7a9407aa 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -458,6 +458,7 @@ private:
SelectionDAG &DAG) const;
SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const;
@@ -468,7 +469,6 @@ private:
SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const;
@@ -476,6 +476,8 @@ private:
SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxFpExtend(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxConvertFpInt(SDValue Op, SelectionDAG &DAG) const;
SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;
SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index f7237f496aee..0ba75a544c04 100644..100755
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -55,6 +55,12 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
+ if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
+ addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
+ addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
+ addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
+ addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
+ }
}
// Set up operation actions.
@@ -83,6 +89,72 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
+ Subtarget.useHVXFloatingPoint()) {
+ setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal);
+ setOperationAction(ISD::FADD, MVT::v64f16, Legal);
+ setOperationAction(ISD::FSUB, MVT::v64f16, Legal);
+ setOperationAction(ISD::FMUL, MVT::v64f16, Legal);
+ setOperationAction(ISD::FADD, MVT::v32f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v32f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v32f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
+
+ // Handle ISD::BUILD_VECTOR for v32f32 in a custom way to generate vsplat
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v32f32, Custom);
+
+ // BUILD_VECTOR with f16 operands cannot be promoted without
+ // promoting the result, so lower the node to vsplat or constant pool
+ setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, MVT::v64f16, Legal);
+ setOperationAction(ISD::SPLAT_VECTOR, MVT::v32f32, Legal);
+ // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
+ // generated.
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
+
+ // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
+ // independent) handling of it would convert it to a load, which is
+ // not always the optimal choice.
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v64f32, Custom);
+ // Make concat-vectors custom to handle concats of more than 2 vectors.
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v128f16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64f32, Custom);
+
+ setOperationAction(ISD::LOAD, MVT::v64f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v64f32, Custom);
+ setOperationAction(ISD::FADD, MVT::v64f32, Custom);
+ setOperationAction(ISD::FSUB, MVT::v64f32, Custom);
+ setOperationAction(ISD::FMUL, MVT::v64f32, Custom);
+ setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom);
+ setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v64f32, Custom);
+
+ if (Subtarget.useHVXQFloatOps()) {
+ setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
+ } else if (Subtarget.useHVXIEEEFPOps()) {
+ setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
+ }
+
+ setOperationAction(ISD::MLOAD, MVT::v32f32, Custom);
+ setOperationAction(ISD::MSTORE, MVT::v32f32, Custom);
+ setOperationAction(ISD::MLOAD, MVT::v64f16, Custom);
+ setOperationAction(ISD::MSTORE, MVT::v64f16, Custom);
+ setOperationAction(ISD::MLOAD, MVT::v64f32, Custom);
+ setOperationAction(ISD::MSTORE, MVT::v64f32, Custom);
+ }
+
for (MVT T : LegalV) {
setIndexedLoadAction(ISD::POST_INC, T, Legal);
setIndexedStoreAction(ISD::POST_INC, T, Legal);
@@ -137,6 +209,18 @@ HexagonTargetLowering::initializeHVXLowering() {
setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
}
+ if (Subtarget.useHVXQFloatOps()) {
+ setOperationAction(ISD::SINT_TO_FP, T, Expand);
+ setOperationAction(ISD::UINT_TO_FP, T, Expand);
+ setOperationAction(ISD::FP_TO_SINT, T, Expand);
+ setOperationAction(ISD::FP_TO_UINT, T, Expand);
+ } else if (Subtarget.useHVXIEEEFPOps()) {
+ setOperationAction(ISD::SINT_TO_FP, T, Custom);
+ setOperationAction(ISD::UINT_TO_FP, T, Custom);
+ setOperationAction(ISD::FP_TO_SINT, T, Custom);
+ setOperationAction(ISD::FP_TO_UINT, T, Custom);
+ }
+
setCondCodeAction(ISD::SETNE, T, Expand);
setCondCodeAction(ISD::SETLE, T, Expand);
setCondCodeAction(ISD::SETGE, T, Expand);
@@ -198,8 +282,39 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::UMIN, T, Custom);
setOperationAction(ISD::UMAX, T, Custom);
}
+
+ setOperationAction(ISD::SINT_TO_FP, T, Custom);
+ setOperationAction(ISD::UINT_TO_FP, T, Custom);
+ setOperationAction(ISD::FP_TO_SINT, T, Custom);
+ setOperationAction(ISD::FP_TO_UINT, T, Custom);
}
+ setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
+
+ setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
+
// Boolean vectors.
for (MVT T : LegalW) {
@@ -497,7 +612,9 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
assert(ElemSize*VecLen == HwLen);
SmallVector<SDValue,32> Words;
- if (VecTy.getVectorElementType() != MVT::i32) {
+ if (VecTy.getVectorElementType() != MVT::i32 &&
+ !(Subtarget.useHVXFloatingPoint() &&
+ VecTy.getVectorElementType() == MVT::f32)) {
assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
@@ -506,22 +623,31 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
Words.push_back(DAG.getBitcast(MVT::i32, W));
}
} else {
- Words.assign(Values.begin(), Values.end());
+ for (SDValue V : Values)
+ Words.push_back(DAG.getBitcast(MVT::i32, V));
}
+ auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
+ unsigned NumValues = Values.size();
+ assert(NumValues > 0);
+ bool IsUndef = true;
+ for (unsigned i = 0; i != NumValues; ++i) {
+ if (Values[i].isUndef())
+ continue;
+ IsUndef = false;
+ if (!SplatV.getNode())
+ SplatV = Values[i];
+ else if (SplatV != Values[i])
+ return false;
+ }
+ if (IsUndef)
+ SplatV = Values[0];
+ return true;
+ };
unsigned NumWords = Words.size();
- bool IsSplat = true, IsUndef = true;
SDValue SplatV;
- for (unsigned i = 0; i != NumWords && IsSplat; ++i) {
- if (isUndef(Words[i]))
- continue;
- IsUndef = false;
- if (!SplatV.getNode())
- SplatV = Words[i];
- else if (SplatV != Words[i])
- IsSplat = false;
- }
- if (IsUndef)
+ bool IsSplat = isSplat(Words, SplatV);
+ if (IsSplat && isUndef(SplatV))
return DAG.getUNDEF(VecTy);
if (IsSplat) {
assert(SplatV.getNode());
@@ -618,24 +744,75 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
}
}
- // Construct two halves in parallel, then or them together.
+ // Find most common element to initialize vector with. This is to avoid
+ // unnecessary vinsert/valign for cases where the same value is present
+ // many times. Creates a histogram of the vector's elements to find the
+ // most common element n.
assert(4*Words.size() == Subtarget.getVectorLength());
- SDValue HalfV0 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
- SDValue HalfV1 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
- SDValue S = DAG.getConstant(4, dl, MVT::i32);
+ int VecHist[32];
+ int n = 0;
+ for (unsigned i = 0; i != NumWords; ++i) {
+ VecHist[i] = 0;
+ if (Words[i].isUndef())
+ continue;
+ for (unsigned j = i; j != NumWords; ++j)
+ if (Words[i] == Words[j])
+ VecHist[i]++;
+
+ if (VecHist[i] > VecHist[n])
+ n = i;
+ }
+
+ SDValue HalfV = getZero(dl, VecTy, DAG);
+ if (VecHist[n] > 1) {
+ SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
+ HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
+ {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
+ }
+ SDValue HalfV0 = HalfV;
+ SDValue HalfV1 = HalfV;
+
+ // Construct two halves in parallel, then or them together. Rn and Rm count
+ // number of rotations needed before the next element. One last rotation is
+ // performed post-loop to position the last element.
+ int Rn = 0, Rm = 0;
+ SDValue Sn, Sm;
+ SDValue N = HalfV0;
+ SDValue M = HalfV1;
for (unsigned i = 0; i != NumWords/2; ++i) {
- SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
- {HalfV0, Words[i]});
- SDValue M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
- {HalfV1, Words[i+NumWords/2]});
- HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, S});
- HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, S});
+
+ // Rotate by element count since last insertion.
+ if (Words[i] != Words[n] || VecHist[n] <= 1) {
+ Sn = DAG.getConstant(Rn, dl, MVT::i32);
+ HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
+ N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
+ {HalfV0, Words[i]});
+ Rn = 0;
+ }
+ if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
+ Sm = DAG.getConstant(Rm, dl, MVT::i32);
+ HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
+ M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
+ {HalfV1, Words[i+NumWords/2]});
+ Rm = 0;
+ }
+ Rn += 4;
+ Rm += 4;
}
+ // Perform last rotation.
+ Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
+ Sm = DAG.getConstant(Rm, dl, MVT::i32);
+ HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
+ HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
+
+ SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
+ SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
+
+ SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
- HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy,
- {HalfV0, DAG.getConstant(HwLen/2, dl, MVT::i32)});
- SDValue DstV = DAG.getNode(ISD::OR, dl, VecTy, {HalfV0, HalfV1});
- return DstV;
+ SDValue OutV =
+ DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
+ return OutV;
}
SDValue
@@ -1237,6 +1414,19 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
if (VecTy.getVectorElementType() == MVT::i1)
return buildHvxVectorPred(Ops, dl, VecTy, DAG);
+ // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
+ // not a legal type, just bitcast the node to use i16
+ // types and bitcast the result back to f16
+ if (VecTy.getVectorElementType() == MVT::f16) {
+ SmallVector<SDValue,64> NewOps;
+ for (unsigned i = 0; i != Size; i++)
+ NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
+
+ SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ tyVector(VecTy, MVT::i16), NewOps);
+ return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
+ }
+
if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
ArrayRef<SDValue> A(Ops);
MVT SingleTy = typeSplit(VecTy).first;
@@ -1249,6 +1439,24 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
}
SDValue
+HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
+ const {
+ const SDLoc &dl(Op);
+ MVT VecTy = ty(Op);
+ MVT ArgTy = ty(Op.getOperand(0));
+
+ if (ArgTy == MVT::f16) {
+ MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
+ SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
+ SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
+ SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
+ return DAG.getBitcast(VecTy, Splat);
+ }
+
+ return SDValue();
+}
+
+SDValue
HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
const {
// Vector concatenation of two integer (non-bool) vectors does not need
@@ -1363,6 +1571,7 @@ SDValue
HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
const {
const SDLoc &dl(Op);
+ MVT VecTy = ty(Op);
SDValue VecV = Op.getOperand(0);
SDValue ValV = Op.getOperand(1);
SDValue IdxV = Op.getOperand(2);
@@ -1370,6 +1579,14 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
if (ElemTy == MVT::i1)
return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
+ if (ElemTy == MVT::f16) {
+ SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ tyVector(VecTy, MVT::i16),
+ DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
+ DAG.getBitcast(MVT::i16, ValV), IdxV);
+ return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
+ }
+
return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
}
@@ -1800,6 +2017,80 @@ HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
}
+SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
+ SelectionDAG &DAG) const {
+ // This conversion only applies to QFloat.
+ assert(Subtarget.useHVXQFloatOps());
+
+ assert(Op->getOpcode() == ISD::FP_EXTEND);
+
+ MVT VecTy = ty(Op);
+ MVT ArgTy = ty(Op.getOperand(0));
+ const SDLoc &dl(Op);
+ assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
+
+ SDValue F16Vec = Op.getOperand(0);
+
+ APFloat FloatVal = APFloat(1.0f);
+ bool Ignored;
+ FloatVal.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
+ SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
+ SDValue VmpyVec =
+ getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
+
+ MVT HalfTy = typeSplit(VecTy).first;
+ VectorPair Pair = opSplit(VmpyVec, dl, DAG);
+ SDValue LoVec =
+ getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
+ SDValue HiVec =
+ getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
+
+ SDValue ShuffVec =
+ getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
+ {HiVec, LoVec, DAG.getConstant(-4, dl, MVT::i32)}, DAG);
+
+ return ShuffVec;
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxConvertFpInt(SDValue Op, SelectionDAG &DAG)
+ const {
+ // This conversion only applies to IEEE.
+ assert(Subtarget.useHVXIEEEFPOps());
+
+ unsigned Opc = Op.getOpcode();
+ // Catch invalid conversion ops (just in case).
+ assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT ||
+ Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
+ MVT ResTy = ty(Op);
+
+ if (Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT) {
+ MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
+ // There are only conversions of f16.
+ if (FpTy != MVT::f16)
+ return SDValue();
+
+ MVT IntTy = ResTy.getVectorElementType();
+ // Other int types aren't legal in HVX, so we shouldn't see them here.
+ assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
+ // Conversions to i8 and i16 are legal.
+ if (IntTy == MVT::i8 || IntTy == MVT::i16)
+ return Op;
+ } else {
+ // Converting int -> fp.
+ if (ResTy.getVectorElementType() != MVT::f16)
+ return SDValue();
+ MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
+ // Other int types aren't legal in HVX, so we shouldn't see them here.
+ assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
+ // i8, i16 -> f16 is legal.
+ if (IntTy == MVT::i8 || IntTy == MVT::i16)
+ return Op;
+ }
+
+ return SDValue();
+}
+
SDValue
HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const {
assert(!Op.isMachineOpcode());
@@ -2104,10 +2395,22 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MLOAD:
case ISD::MSTORE:
return SplitHvxMemOp(Op, DAG);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
+ return SplitHvxPairOp(Op, DAG);
+ break;
case ISD::CTPOP:
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::MUL:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
case ISD::MULHS:
case ISD::MULHU:
case ISD::AND:
@@ -2134,6 +2437,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
default:
break;
case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
+ case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
@@ -2158,6 +2462,11 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
// Unaligned loads will be handled by the default lowering.
case ISD::LOAD: return SDValue();
+ case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return LowerHvxConvertFpInt(Op, DAG);
}
#ifndef NDEBUG
Op.dumpr(&DAG);
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 931b0c0e0090..9b4e92a16663 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -146,6 +146,48 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
return Count;
}
+// Check if the A2_tfrsi instruction is cheap or not. If the operand has
+// to be constant-extendend it is not cheap since it occupies two slots
+// in a packet.
+bool HexagonInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
+ // Enable the following steps only at Os/Oz
+ if (!(MI.getMF()->getFunction().hasOptSize()))
+ return MI.isAsCheapAsAMove();
+
+ if (MI.getOpcode() == Hexagon::A2_tfrsi) {
+ auto Op = MI.getOperand(1);
+ // If the instruction has a global address as operand, it is not cheap
+ // since the operand will be constant extended.
+ if (Op.getType() == MachineOperand::MO_GlobalAddress)
+ return false;
+ // If the instruction has an operand of size > 16bits, its will be
+ // const-extended and hence, it is not cheap.
+ if (Op.isImm()) {
+ int64_t Imm = Op.getImm();
+ if (!isInt<16>(Imm))
+ return false;
+ }
+ }
+ return MI.isAsCheapAsAMove();
+}
+
+// Do not sink floating point instructions that updates USR register.
+// Example:
+// feclearexcept
+// F2_conv_w2sf
+// fetestexcept
+// MachineSink sinks F2_conv_w2sf and we are not able to catch exceptions.
+// TODO: On some of these floating point instructions, USR is marked as Use.
+// In reality, these instructions also Def the USR. If USR is marked as Def,
+// some of the assumptions in assembler packetization are broken.
+bool HexagonInstrInfo::shouldSink(const MachineInstr &MI) const {
+ // Assumption: A floating point instruction that reads the USR will write
+ // the USR as well.
+ if (isFloat(MI) && MI.hasRegisterImplicitUseOperand(Hexagon::USR))
+ return false;
+ return true;
+}
+
/// Find the hardware loop instruction used to set-up the specified loop.
/// On Hexagon, we have two instructions used to set-up the hardware loop
/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
@@ -1464,75 +1506,75 @@ HexagonInstrInfo::expandVGatherPseudo(MachineInstr &MI) const {
switch (Opc) {
case Hexagon::V6_vgathermh_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh))
- .add(MI.getOperand(1))
.add(MI.getOperand(2))
- .add(MI.getOperand(3));
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
- .addImm(0)
+ .addImm(MI.getOperand(1).getImm())
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
case Hexagon::V6_vgathermw_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw))
- .add(MI.getOperand(1))
.add(MI.getOperand(2))
- .add(MI.getOperand(3));
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
- .addImm(0)
+ .addImm(MI.getOperand(1).getImm())
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
case Hexagon::V6_vgathermhw_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw))
- .add(MI.getOperand(1))
.add(MI.getOperand(2))
- .add(MI.getOperand(3));
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
- .addImm(0)
+ .addImm(MI.getOperand(1).getImm())
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
case Hexagon::V6_vgathermhq_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq))
- .add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
- .add(MI.getOperand(4));
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
- .addImm(0)
+ .addImm(MI.getOperand(1).getImm())
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
case Hexagon::V6_vgathermwq_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq))
- .add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
- .add(MI.getOperand(4));
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
- .addImm(0)
+ .addImm(MI.getOperand(1).getImm())
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
case Hexagon::V6_vgathermhwq_pseudo:
First = BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq))
- .add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
- .add(MI.getOperand(4));
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai))
.add(MI.getOperand(0))
- .addImm(0)
+ .addImm(MI.getOperand(1).getImm())
.addReg(Hexagon::VTMP);
MBB.erase(MI);
return First.getInstrIterator();
@@ -1851,6 +1893,7 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
case Hexagon::C4_cmplte:
case Hexagon::C4_cmplteu:
SrcReg2 = MI.getOperand(2).getReg();
+ Value = 0;
return true;
case Hexagon::C2_cmpeqi:
@@ -2725,7 +2768,13 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::V6_vL32b_nt_ai:
case Hexagon::V6_vS32b_nt_ai:
case Hexagon::V6_vL32Ub_ai:
- case Hexagon::V6_vS32Ub_ai: {
+ case Hexagon::V6_vS32Ub_ai:
+ case Hexagon::V6_vgathermh_pseudo:
+ case Hexagon::V6_vgathermw_pseudo:
+ case Hexagon::V6_vgathermhw_pseudo:
+ case Hexagon::V6_vgathermhq_pseudo:
+ case Hexagon::V6_vgathermwq_pseudo:
+ case Hexagon::V6_vgathermhwq_pseudo: {
unsigned VectorSize = TRI->getSpillSize(Hexagon::HvxVRRegClass);
assert(isPowerOf2_32(VectorSize));
if (Offset & (VectorSize-1))
@@ -2751,6 +2800,11 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::S4_storeirit_io:
case Hexagon::S4_storeirif_io:
return isShiftedUInt<6,2>(Offset);
+ // Handle these two compare instructions that are not extendable.
+ case Hexagon::A4_cmpbeqi:
+ return isUInt<8>(Offset);
+ case Hexagon::A4_cmpbgti:
+ return isInt<8>(Offset);
}
if (Extend)
@@ -2788,6 +2842,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::L4_isub_memopw_io:
case Hexagon::L4_add_memopw_io:
case Hexagon::L4_sub_memopw_io:
+ case Hexagon::L4_iand_memopw_io:
+ case Hexagon::L4_ior_memopw_io:
case Hexagon::L4_and_memopw_io:
case Hexagon::L4_or_memopw_io:
return (0 <= Offset && Offset <= 255);
@@ -2796,6 +2852,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::L4_isub_memoph_io:
case Hexagon::L4_add_memoph_io:
case Hexagon::L4_sub_memoph_io:
+ case Hexagon::L4_iand_memoph_io:
+ case Hexagon::L4_ior_memoph_io:
case Hexagon::L4_and_memoph_io:
case Hexagon::L4_or_memoph_io:
return (0 <= Offset && Offset <= 127);
@@ -2804,6 +2862,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::L4_isub_memopb_io:
case Hexagon::L4_add_memopb_io:
case Hexagon::L4_sub_memopb_io:
+ case Hexagon::L4_iand_memopb_io:
+ case Hexagon::L4_ior_memopb_io:
case Hexagon::L4_and_memopb_io:
case Hexagon::L4_or_memopb_io:
return (0 <= Offset && Offset <= 63);
@@ -2848,8 +2908,18 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::S2_pstorerdt_io:
case Hexagon::S2_pstorerdf_io:
return isShiftedUInt<6,3>(Offset);
+
+ case Hexagon::L2_loadbsw2_io:
+ case Hexagon::L2_loadbzw2_io:
+ return isShiftedInt<11,1>(Offset);
+
+ case Hexagon::L2_loadbsw4_io:
+ case Hexagon::L2_loadbzw4_io:
+ return isShiftedInt<11,2>(Offset);
} // switch
+ dbgs() << "Failed Opcode is : " << Opcode << " (" << getName(Opcode)
+ << ")\n";
llvm_unreachable("No offset range is defined for this opcode. "
"Please define it in the above switch statement!");
}
@@ -3486,9 +3556,9 @@ int HexagonInstrInfo::getDuplexOpcode(const MachineInstr &MI,
if (Iter != DupMap.end())
return Iter->second;
} else { // Conversion to Tiny core.
- for (auto Iter = DupMap.begin(), End = DupMap.end(); Iter != End; ++Iter)
- if (Iter->second == OpNum)
- return Iter->first;
+ for (const auto &Iter : DupMap)
+ if (Iter.second == OpNum)
+ return Iter.first;
}
return -1;
}
@@ -3516,6 +3586,10 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
return Hexagon::V6_vL32b_nt_cur_pi;
case Hexagon::V6_vL32b_nt_ai:
return Hexagon::V6_vL32b_nt_cur_ai;
+ case Hexagon::V6_vL32b_ppu:
+ return Hexagon::V6_vL32b_cur_ppu;
+ case Hexagon::V6_vL32b_nt_ppu:
+ return Hexagon::V6_vL32b_nt_cur_ppu;
}
return 0;
}
@@ -3532,6 +3606,10 @@ int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const {
return Hexagon::V6_vL32b_nt_pi;
case Hexagon::V6_vL32b_nt_cur_ai:
return Hexagon::V6_vL32b_nt_ai;
+ case Hexagon::V6_vL32b_cur_ppu:
+ return Hexagon::V6_vL32b_ppu;
+ case Hexagon::V6_vL32b_nt_cur_ppu:
+ return Hexagon::V6_vL32b_nt_ppu;
}
return 0;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 830f04d9eac3..2af09c857d86 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -335,6 +335,13 @@ public:
getSerializableBitmaskMachineOperandTargetFlags() const override;
bool isTailCall(const MachineInstr &MI) const override;
+ bool isAsCheapAsAMove(const MachineInstr &MI) const override;
+
+ // Return true if the instruction should be sunk by MachineSink.
+ // MachineSink determines on its own whether the instruction is safe to sink;
+ // this gives the target a hook to override the default behavior with regards
+ // to which instructions should be sunk.
+ bool shouldSink(const MachineInstr &MI) const override;
/// HexagonInstrInfo specifics.
diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 2cdfbe7845b6..ea6a7498e27f 100644
--- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -110,6 +110,8 @@ private:
bool changeAddAsl(NodeAddr<UseNode *> AddAslUN, MachineInstr *AddAslMI,
const MachineOperand &ImmOp, unsigned ImmOpNum);
bool isValidOffset(MachineInstr *MI, int Offset);
+ unsigned getBaseOpPosition(MachineInstr *MI);
+ unsigned getOffsetOpPosition(MachineInstr *MI);
};
} // end anonymous namespace
@@ -322,6 +324,25 @@ bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN,
}
bool HexagonOptAddrMode::isValidOffset(MachineInstr *MI, int Offset) {
+ if (HII->isHVXVec(*MI)) {
+ // only HVX vgather instructions handled
+ // TODO: extend the pass to other vector load/store operations
+ switch (MI->getOpcode()) {
+ case Hexagon::V6_vgathermh_pseudo:
+ case Hexagon::V6_vgathermw_pseudo:
+ case Hexagon::V6_vgathermhw_pseudo:
+ case Hexagon::V6_vgathermhq_pseudo:
+ case Hexagon::V6_vgathermwq_pseudo:
+ case Hexagon::V6_vgathermhwq_pseudo:
+ return HII->isValidOffset(MI->getOpcode(), Offset, HRI, false);
+ default:
+ return false;
+ }
+ }
+
+ if (HII->getAddrMode(*MI) != HexagonII::BaseImmOffset)
+ return false;
+
unsigned AlignMask = 0;
switch (HII->getMemAccessSize(*MI)) {
case HexagonII::MemAccessSize::DoubleWordAccess:
@@ -345,29 +366,67 @@ bool HexagonOptAddrMode::isValidOffset(MachineInstr *MI, int Offset) {
return HII->isValidOffset(MI->getOpcode(), Offset, HRI, false);
}
+unsigned HexagonOptAddrMode::getBaseOpPosition(MachineInstr *MI) {
+ const MCInstrDesc &MID = MI->getDesc();
+ switch (MI->getOpcode()) {
+ // vgather pseudos are mayLoad and mayStore
+ // hence need to explicitly specify Base and
+ // Offset operand positions
+ case Hexagon::V6_vgathermh_pseudo:
+ case Hexagon::V6_vgathermw_pseudo:
+ case Hexagon::V6_vgathermhw_pseudo:
+ case Hexagon::V6_vgathermhq_pseudo:
+ case Hexagon::V6_vgathermwq_pseudo:
+ case Hexagon::V6_vgathermhwq_pseudo:
+ return 0;
+ default:
+ return MID.mayLoad() ? 1 : 0;
+ }
+}
+
+unsigned HexagonOptAddrMode::getOffsetOpPosition(MachineInstr *MI) {
+ assert(
+ (HII->getAddrMode(*MI) == HexagonII::BaseImmOffset) &&
+ "Looking for an offset in non-BaseImmOffset addressing mode instruction");
+
+ const MCInstrDesc &MID = MI->getDesc();
+ switch (MI->getOpcode()) {
+ // vgather pseudos are mayLoad and mayStore
+ // hence need to explicitly specify Base and
+ // Offset operand positions
+ case Hexagon::V6_vgathermh_pseudo:
+ case Hexagon::V6_vgathermw_pseudo:
+ case Hexagon::V6_vgathermhw_pseudo:
+ case Hexagon::V6_vgathermhq_pseudo:
+ case Hexagon::V6_vgathermwq_pseudo:
+ case Hexagon::V6_vgathermhwq_pseudo:
+ return 1;
+ default:
+ return MID.mayLoad() ? 2 : 1;
+ }
+}
+
bool HexagonOptAddrMode::processAddUses(NodeAddr<StmtNode *> AddSN,
MachineInstr *AddMI,
const NodeList &UNodeList) {
Register AddDefR = AddMI->getOperand(0).getReg();
+ Register BaseReg = AddMI->getOperand(1).getReg();
for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) {
NodeAddr<UseNode *> UN = *I;
NodeAddr<StmtNode *> SN = UN.Addr->getOwner(*DFG);
MachineInstr *MI = SN.Addr->getCode();
const MCInstrDesc &MID = MI->getDesc();
if ((!MID.mayLoad() && !MID.mayStore()) ||
- HII->getAddrMode(*MI) != HexagonII::BaseImmOffset ||
- HII->isHVXVec(*MI))
+ HII->getAddrMode(*MI) != HexagonII::BaseImmOffset)
return false;
- MachineOperand BaseOp = MID.mayLoad() ? MI->getOperand(1)
- : MI->getOperand(0);
+ MachineOperand BaseOp = MI->getOperand(getBaseOpPosition(MI));
if (!BaseOp.isReg() || BaseOp.getReg() != AddDefR)
return false;
- MachineOperand OffsetOp = MID.mayLoad() ? MI->getOperand(2)
- : MI->getOperand(1);
+ MachineOperand OffsetOp = MI->getOperand(getOffsetOpPosition(MI));
if (!OffsetOp.isImm())
return false;
@@ -382,11 +441,19 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr<StmtNode *> AddSN,
// Ex: Rx= add(Rt,#10)
// memw(Rx+#0) = Rs
// will be replaced with => memw(Rt+#10) = Rs
- Register BaseReg = AddMI->getOperand(1).getReg();
if (!isSafeToExtLR(AddSN, AddMI, BaseReg, UNodeList))
return false;
}
+ NodeId LRExtRegRD = 0;
+ // Iterate through all the UseNodes in SN and find the reaching def
+ // for the LRExtReg.
+ for (NodeAddr<UseNode *> UA : AddSN.Addr->members_if(DFG->IsUse, *DFG)) {
+ RegisterRef RR = UA.Addr->getRegRef(*DFG);
+ if (BaseReg == RR.Reg)
+ LRExtRegRD = UA.Addr->getReachingDef();
+ }
+
// Update all the uses of 'add' with the appropriate base and offset
// values.
bool Changed = false;
@@ -400,6 +467,12 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr<StmtNode *> AddSN,
LLVM_DEBUG(dbgs() << "\t\t[MI <BB#" << UseMI->getParent()->getNumber()
<< ">]: " << *UseMI << "\n");
Changed |= updateAddUses(AddMI, UseMI);
+
+ // Set the reachingDef for UseNode under consideration
+ // after updating the Add use. This local change is
+ // to avoid rebuilding of the RDF graph after update.
+ NodeAddr<DefNode *> LRExtRegDN = DFG->addr<DefNode *>(LRExtRegRD);
+ UseN.Addr->linkToDef(UseN.Id, LRExtRegDN);
}
if (Changed)
@@ -409,21 +482,18 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr<StmtNode *> AddSN,
}
bool HexagonOptAddrMode::updateAddUses(MachineInstr *AddMI,
- MachineInstr *UseMI) {
+ MachineInstr *UseMI) {
const MachineOperand ImmOp = AddMI->getOperand(2);
const MachineOperand AddRegOp = AddMI->getOperand(1);
- Register newReg = AddRegOp.getReg();
- const MCInstrDesc &MID = UseMI->getDesc();
+ Register NewReg = AddRegOp.getReg();
- MachineOperand &BaseOp = MID.mayLoad() ? UseMI->getOperand(1)
- : UseMI->getOperand(0);
- MachineOperand &OffsetOp = MID.mayLoad() ? UseMI->getOperand(2)
- : UseMI->getOperand(1);
- BaseOp.setReg(newReg);
+ MachineOperand &BaseOp = UseMI->getOperand(getBaseOpPosition(UseMI));
+ MachineOperand &OffsetOp = UseMI->getOperand(getOffsetOpPosition(UseMI));
+ BaseOp.setReg(NewReg);
BaseOp.setIsUndef(AddRegOp.isUndef());
BaseOp.setImplicit(AddRegOp.isImplicit());
OffsetOp.setImm(ImmOp.getImm() + OffsetOp.getImm());
- MRI->clearKillFlags(newReg);
+ MRI->clearKillFlags(NewReg);
return true;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index cad5ca8ab92e..3abbd896c519 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -87,18 +87,6 @@ def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
-def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>;
-def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>;
-def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>;
-
-def HVI8: PatLeaf<(VecI8 HvxVR:$R)>;
-def HVI16: PatLeaf<(VecI16 HvxVR:$R)>;
-def HVI32: PatLeaf<(VecI32 HvxVR:$R)>;
-
-def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>;
-def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>;
-def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>;
-
def SDTVecLeaf:
SDTypeProfile<1, 0, [SDTCisVec<0>]>;
def SDTVecVecIntOp:
@@ -269,6 +257,9 @@ def anyimm3: PatLeaf<(i32 AnyImm3:$Addr)>;
def f32ImmPred : PatLeaf<(f32 fpimm:$F)>;
def f64ImmPred : PatLeaf<(f64 fpimm:$F)>;
+def f32zero: PatLeaf<(f32 fpimm:$F), [{
+ return N->isExactlyValue(APFloat::getZero(APFloat::IEEEsingle(), false));
+}]>;
// This complex pattern is really only to detect various forms of
// sign-extension i32->i64. The selected value will be of type i64
@@ -378,6 +369,12 @@ def Umin: pf2<umin>; def Umax: pf2<umax>;
def Rol: pf2<rotl>;
+def Fptosi: pf1<fp_to_sint>;
+def Fptoui: pf1<fp_to_uint>;
+def Sitofp: pf1<sint_to_fp>;
+def Uitofp: pf1<uint_to_fp>;
+
+
// --(1) Immediate -------------------------------------------------------
//
@@ -2083,7 +2080,7 @@ let AddedComplexity = 20 in {
defm: Loadxi_pat<sextloadi8, i32, anyimm0, L2_loadrb_io>;
defm: Loadxi_pat<sextloadi16, i32, anyimm1, L2_loadrh_io>;
defm: Loadxi_pat<sextloadv2i8, v2i16, anyimm1, L2_loadbsw2_io>;
- defm: Loadxi_pat<sextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>;
+ defm: Loadxi_pat<sextloadv4i8, v4i16, anyimm2, L2_loadbsw4_io>;
defm: Loadxi_pat<zextloadi1, i32, anyimm0, L2_loadrub_io>;
defm: Loadxi_pat<zextloadi8, i32, anyimm0, L2_loadrub_io>;
defm: Loadxi_pat<zextloadi16, i32, anyimm1, L2_loadruh_io>;
@@ -2135,7 +2132,7 @@ let AddedComplexity = 60 in {
def: Loadxu_pat<sextloadi8, i32, anyimm0, L4_loadrb_ur>;
def: Loadxu_pat<sextloadi16, i32, anyimm1, L4_loadrh_ur>;
def: Loadxu_pat<sextloadv2i8, v2i16, anyimm1, L4_loadbsw2_ur>;
- def: Loadxu_pat<sextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>;
+ def: Loadxu_pat<sextloadv4i8, v4i16, anyimm2, L4_loadbsw4_ur>;
def: Loadxu_pat<zextloadi1, i32, anyimm0, L4_loadrub_ur>;
def: Loadxu_pat<zextloadi8, i32, anyimm0, L4_loadrub_ur>;
def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>;
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index a22a3f8ec0ca..0a3dff057ccd 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -6,6 +6,21 @@
//
//===----------------------------------------------------------------------===//
+def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>;
+def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>;
+def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>;
+
+def HVI8: PatLeaf<(VecI8 HvxVR:$R)>;
+def HVI16: PatLeaf<(VecI16 HvxVR:$R)>;
+def HVI32: PatLeaf<(VecI32 HvxVR:$R)>;
+def HVF16: PatLeaf<(VecF16 HvxVR:$R)>;
+def HVF32: PatLeaf<(VecF32 HvxVR:$R)>;
+
+def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>;
+def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>;
+def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>;
+def HWF16: PatLeaf<(VecPF16 HvxWR:$R)>;
+def HWF32: PatLeaf<(VecPF32 HvxWR:$R)>;
def SDTVecUnaryOp:
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
@@ -49,7 +64,7 @@ def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>;
def HexagonVUNPACK: SDNode<"HexagonISD::VUNPACK", SDTVecUnaryOp>;
def HexagonVUNPACKU: SDNode<"HexagonISD::VUNPACKU", SDTVecUnaryOp>;
-def vzero: PatFrag<(ops), (splat_vector (i32 0))>;
+def vzero: PatFrags<(ops), [(splat_vector (i32 0)), (splat_vector (f32zero))]>;
def qtrue: PatFrag<(ops), (HexagonQTRUE)>;
def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
def qcat: PatFrag<(ops node:$Qs, node:$Qt),
@@ -150,12 +165,19 @@ let Predicates = [UseHVX] in {
defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI8, IsVecOff>;
defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI16, IsVecOff>;
defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI32, IsVecOff>;
-
defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI8, IsVecOff>;
defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI16, IsVecOff>;
defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI32, IsVecOff>;
}
+let Predicates = [UseHVXV68] in {
+ defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF16, IsVecOff>;
+ defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF32, IsVecOff>;
+ defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF16, IsVecOff>;
+ defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF32, IsVecOff>;
+ defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF16, IsVecOff>;
+ defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF32, IsVecOff>;
+}
// HVX stores
@@ -199,6 +221,15 @@ let Predicates = [UseHVX] in {
defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVI32, IsVecOff>;
}
+let Predicates = [UseHVXV68] in {
+ defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF16, IsVecOff>;
+ defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF32, IsVecOff>;
+ defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF16, IsVecOff>;
+ defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF32, IsVecOff>;
+ defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF16, IsVecOff>;
+ defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF32, IsVecOff>;
+}
+
// Bitcasts between same-size vector types are no-ops, except for the
// actual type change.
let Predicates = [UseHVX] in {
@@ -211,6 +242,24 @@ let Predicates = [UseHVX] in {
defm: NopCast_pat<VecPI16, VecPI32, HvxWR>;
}
+let Predicates = [UseHVX, UseHVXFloatingPoint] in {
+ defm: NopCast_pat<VecI8, VecF16, HvxVR>;
+ defm: NopCast_pat<VecI8, VecF32, HvxVR>;
+ defm: NopCast_pat<VecI16, VecF16, HvxVR>;
+ defm: NopCast_pat<VecI16, VecF32, HvxVR>;
+ defm: NopCast_pat<VecI32, VecF16, HvxVR>;
+ defm: NopCast_pat<VecI32, VecF32, HvxVR>;
+ defm: NopCast_pat<VecF16, VecF32, HvxVR>;
+
+ defm: NopCast_pat<VecPI8, VecPF16, HvxWR>;
+ defm: NopCast_pat<VecPI8, VecPF32, HvxWR>;
+ defm: NopCast_pat<VecPI16, VecPF16, HvxWR>;
+ defm: NopCast_pat<VecPI16, VecPF32, HvxWR>;
+ defm: NopCast_pat<VecPI32, VecPF16, HvxWR>;
+ defm: NopCast_pat<VecPI32, VecPF32, HvxWR>;
+ defm: NopCast_pat<VecPF16, VecPF32, HvxWR>;
+}
+
let Predicates = [UseHVX] in {
let AddedComplexity = 100 in {
// These should be preferred over a vsplat of 0.
@@ -220,6 +269,7 @@ let Predicates = [UseHVX] in {
def: Pat<(VecPI8 vzero), (PS_vdd0)>;
def: Pat<(VecPI16 vzero), (PS_vdd0)>;
def: Pat<(VecPI32 vzero), (PS_vdd0)>;
+ def: Pat<(VecPF32 vzero), (PS_vdd0)>;
def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>;
def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>;
@@ -251,6 +301,28 @@ let Predicates = [UseHVX] in {
(V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
}
+let Predicates = [UseHVX, UseHVXFloatingPoint] in {
+ let AddedComplexity = 100 in {
+ def: Pat<(VecF16 vzero), (V6_vd0)>;
+ def: Pat<(VecF32 vzero), (V6_vd0)>;
+ def: Pat<(VecPF16 vzero), (PS_vdd0)>;
+ def: Pat<(VecPF32 vzero), (PS_vdd0)>;
+
+ def: Pat<(concat_vectors (VecF16 vzero), (VecF16 vzero)), (PS_vdd0)>;
+ def: Pat<(concat_vectors (VecF32 vzero), (VecF32 vzero)), (PS_vdd0)>;
+ }
+
+ def: Pat<(VecPF16 (concat_vectors HVF16:$Vs, HVF16:$Vt)),
+ (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
+ def: Pat<(VecPF32 (concat_vectors HVF32:$Vs, HVF32:$Vt)),
+ (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
+
+ def: Pat<(HexagonVINSERTW0 HVF16:$Vu, I32:$Rt),
+ (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
+ def: Pat<(HexagonVINSERTW0 HVF32:$Vu, I32:$Rt),
+ (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
+}
+
// Splats for HvxV60
def V60splatib: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatB $V)))>;
def V60splatih: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatH $V)))>;
@@ -307,6 +379,18 @@ let Predicates = [UseHVX,UseHVXV62] in {
def: Pat<(VecPI32 (splat_vector I32:$Rs)), (Rep (V62splatrw $Rs))>;
}
}
+let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
+ let AddedComplexity = 30 in {
+ def: Pat<(VecF16 (splat_vector u16_0ImmPred:$V)), (V62splatih imm:$V)>;
+ def: Pat<(VecF32 (splat_vector anyint:$V)), (V62splatiw imm:$V)>;
+ def: Pat<(VecF32 (splat_vector f32ImmPred:$V)), (V62splatiw (ftoi $V))>;
+ }
+ let AddedComplexity = 20 in {
+ def: Pat<(VecF16 (splat_vector I32:$Rs)), (V62splatrh $Rs)>;
+ def: Pat<(VecF32 (splat_vector I32:$Rs)), (V62splatrw $Rs)>;
+ def: Pat<(VecF32 (splat_vector F32:$Rs)), (V62splatrw $Rs)>;
+ }
+}
class Vneg1<ValueType VecTy>
: PatFrag<(ops), (VecTy (splat_vector (i32 -1)))>;
@@ -369,6 +453,107 @@ let Predicates = [UseHVX] in {
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
}
+// For now, we always deal with vector floating point in SF mode.
+class OpR_RR_pat_conv<InstHexagon MI, PatFrag Op, ValueType ResType,
+ PatFrag RsPred, PatFrag RtPred = RsPred>
+ : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
+ (V6_vconv_sf_qf32 (VecF32 (MI RsPred:$Rs, RtPred:$Rt)))>;
+
+class OpR_RR_pat_conv_hf<InstHexagon MI, PatFrag Op, ValueType ResType,
+ PatFrag RsPred, PatFrag RtPred = RsPred>
+ : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
+ (V6_vconv_hf_qf16 (VecF16 (MI RsPred:$Rs, RtPred:$Rt)))>;
+
+let Predicates = [UseHVXV68, UseHVXQFloat] in {
+ def: OpR_RR_pat_conv_hf<V6_vsub_hf, pf2<fsub>, VecF16, HVF16>;
+ def: OpR_RR_pat_conv_hf<V6_vadd_hf, pf2<fadd>, VecF16, HVF16>;
+ def: OpR_RR_pat_conv_hf<V6_vmpy_qf16_hf, pf2<fmul>, VecF16, HVF16>;
+ def: OpR_RR_pat_conv<V6_vsub_sf, pf2<fsub>, VecF32, HVF32>;
+ def: OpR_RR_pat_conv<V6_vadd_sf, pf2<fadd>, VecF32, HVF32>;
+ def: OpR_RR_pat_conv<V6_vmpy_qf32_sf, pf2<fmul>, VecF32, HVF32>;
+
+ // For now we assume that the fp32 register is always coming in as IEEE float
+ // since the qfloat arithmetic instructions above always generate the
+ // accompanying conversions as part of their pattern
+ def: Pat<(VecF16 (pf1<fpround> HWF32:$Vuu)),
+ (V6_vdealh (V6_vconv_hf_qf32
+ (VecPF32 (Combinev (V6_vadd_sf (HiVec HvxWR:$Vuu), (V6_vd0)),
+ (V6_vadd_sf (LoVec HvxWR:$Vuu), (V6_vd0))
+ ))))>;
+ // fpextend for QFloat is handled manually in HexagonISelLoweringHVX.cpp.
+}
+
+// HVX IEEE arithmetic Instructions
+let Predicates = [UseHVXV68, UseHVXIEEEFP] in {
+ def: Pat<(fadd HVF16:$Rs, HVF16:$Rt),
+ (V6_vadd_hf_hf HVF16:$Rs, HVF16:$Rt)>;
+ def: Pat<(fadd HVF32:$Rs, HVF32:$Rt),
+ (V6_vadd_sf_sf HVF32:$Rs, HVF32:$Rt)>;
+ def: Pat<(fsub HVF16:$Rs, HVF16:$Rt),
+ (V6_vsub_hf_hf HVF16:$Rs, HVF16:$Rt)>;
+ def: Pat<(fsub HVF32:$Rs, HVF32:$Rt),
+ (V6_vsub_sf_sf HVF32:$Rs, HVF32:$Rt)>;
+ def: Pat<(fmul HVF16:$Rs, HVF16:$Rt),
+ (V6_vmpy_hf_hf HVF16:$Rs, HVF16:$Rt)>;
+ def: Pat<(fmul HVF32:$Rs, HVF32:$Rt),
+ (V6_vmpy_sf_sf HVF32:$Rs, HVF32:$Rt)>;
+
+ def: Pat<(VecF16 (pf1<fpround> HWF32:$Vuu)),
+ (V6_vdealh (V6_vcvt_hf_sf (HiVec HvxWR:$Vuu), (LoVec HvxWR:$Vuu)))>;
+ def: Pat<(VecPF32 (pf1<fpextend> HVF16:$Vu)),
+ (V6_vcvt_sf_hf (V6_vshuffh HvxVR:$Vu))>;
+
+ def: OpR_R_pat<V6_vcvt_h_hf, Fptosi, VecI16, HVF16>;
+ def: OpR_R_pat<V6_vcvt_uh_hf, Fptoui, VecI16, HVF16>;
+ def: OpR_R_pat<V6_vcvt_hf_h, Sitofp, VecF16, HVI16>;
+ def: OpR_R_pat<V6_vcvt_hf_uh, Uitofp, VecF16, HVI16>;
+
+ def: Pat<(VecI8 (Fptosi HWF16:$Vu)),
+ (V6_vcvt_b_hf (HiVec $Vu), (LoVec $Vu))>;
+ def: Pat<(VecI8 (Fptoui HWF16:$Vu)),
+ (V6_vcvt_ub_hf (HiVec $Vu), (LoVec $Vu))>;
+ def: Pat<(VecPF16 (Sitofp HVI8:$Vu)), (V6_vcvt_hf_b HvxVR:$Vu)>;
+ def: Pat<(VecPF16 (Uitofp HVI8:$Vu)), (V6_vcvt_hf_ub HvxVR:$Vu)>;
+}
+
+let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
+ def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
+ def: Pat<(vselect (qnot HQ16:$Qu), HVF16:$Vs, HVF16:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
+
+ def: Pat<(vselect HQ32:$Qu, HVF32:$Vs, HVF32:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
+ def: Pat<(vselect (qnot HQ32:$Qu), HVF32:$Vs, HVF32:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
+}
+
+let Predicates = [UseHVXV68, UseHVX128B, UseHVXQFloat] in {
+ let AddedComplexity = 220 in {
+ defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setgt, VecQ16, HVF16>;
+ defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setogt, VecQ16, HVF16>;
+ defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect, setgt, VecQ32, HVF32>;
+ defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect, setogt, VecQ32, HVF32>;
+ }
+ def: OpR_RR_pat<V6_vmin_hf, pf2<fminnum>, VecF16, HVF16>;
+ def: OpR_RR_pat<V6_vmax_hf, pf2<fmaxnum>, VecF16, HVF16>;
+ def: OpR_RR_pat<V6_vmin_sf, pf2<fminnum>, VecF32, HVF32>;
+ def: OpR_RR_pat<V6_vmax_sf, pf2<fmaxnum>, VecF32, HVF32>;
+}
+
+let Predicates = [UseHVXV68, UseHVX128B, UseHVXIEEEFP] in {
+ let AddedComplexity = 220 in {
+ defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect, setgt, VecQ16, HVF16>;
+ defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect, setogt, VecQ16, HVF16>;
+ defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect, setgt, VecQ32, HVF32>;
+ defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect, setogt, VecQ32, HVF32>;
+ }
+ def: OpR_RR_pat<V6_vfmin_hf, pf2<fminnum>, VecF16, HVF16>;
+ def: OpR_RR_pat<V6_vfmax_hf, pf2<fmaxnum>, VecF16, HVF16>;
+ def: OpR_RR_pat<V6_vfmin_sf, pf2<fminnum>, VecF32, HVF32>;
+ def: OpR_RR_pat<V6_vfmax_sf, pf2<fmaxnum>, VecF32, HVF32>;
+}
+
let Predicates = [UseHVX] in {
// For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
// V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
@@ -551,6 +736,12 @@ let Predicates = [UseHVX] in {
def: HvxSel_pat<PS_wselect, HWI32>;
}
+def V2Q: OutPatFrag<(ops node:$Vs), (V6_vandvrt $Vs, (A2_tfrsi -1))>;
+
+let Predicates = [UseHVX] in
+ def: Pat<(select I1:$Pu, VecI1:$Qs, VecI1:$Qt),
+ (V2Q (PS_vselect $Pu, (Q2V $Qs), (Q2V $Qt)))>;
+
let Predicates = [UseHVX] in {
def: Pat<(VecQ8 (qtrue)), (PS_qtrue)>;
def: Pat<(VecQ16 (qtrue)), (PS_qtrue)>;
@@ -623,3 +814,63 @@ let Predicates = [UseHVX] in {
def: AccRRR_pat<V6_vgtuw_or, Or, setugt, HQ32, HVI32, HVI32>;
def: AccRRR_pat<V6_vgtuw_xor, Xor, setugt, HQ32, HVI32, HVI32>;
}
+
+let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
+ def: OpR_RR_pat<V6_veqh, seteq, VecQ16, HVF16>;
+ def: OpR_RR_pat<V6_veqh, setoeq, VecQ16, HVF16>;
+ def: OpR_RR_pat<V6_veqh, setueq, VecQ16, HVF16>;
+ def: OpR_RR_pat<V6_vgthf, setgt, VecQ16, HVF16>;
+ def: OpR_RR_pat<V6_vgthf, setogt, VecQ16, HVF16>;
+ def: OpR_RR_pat<V6_vgthf, setugt, VecQ16, HVF16>;
+
+ def: OpR_RR_pat<V6_veqw, seteq, VecQ32, HVF32>;
+ def: OpR_RR_pat<V6_veqw, setoeq, VecQ32, HVF32>;
+ def: OpR_RR_pat<V6_veqw, setueq, VecQ32, HVF32>;
+ def: OpR_RR_pat<V6_vgtsf, setgt, VecQ32, HVF32>;
+ def: OpR_RR_pat<V6_vgtsf, setogt, VecQ32, HVF32>;
+ def: OpR_RR_pat<V6_vgtsf, setugt, VecQ32, HVF32>;
+
+ def: AccRRR_pat<V6_veqh_and, And, seteq, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_veqh_or, Or, seteq, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_veqh_xor, Xor, seteq, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_veqh_and, And, setoeq, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_veqh_or, Or, setoeq, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_veqh_xor, Xor, setoeq, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_veqh_and, And, setueq, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_veqh_or, Or, setueq, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_veqh_xor, Xor, setueq, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_vgthf_and, And, setgt, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_vgthf_or, Or, setgt, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_vgthf_xor, Xor, setgt, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_vgthf_and, And, setogt, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_vgthf_or, Or, setogt, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_vgthf_xor, Xor, setogt, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_vgthf_and, And, setugt, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_vgthf_or, Or, setugt, HQ16, HVF16, HVF16>;
+ def: AccRRR_pat<V6_vgthf_xor, Xor, setugt, HQ16, HVF16, HVF16>;
+
+ def: AccRRR_pat<V6_veqw_and, And, seteq, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_veqw_or, Or, seteq, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_veqw_xor, Xor, seteq, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_veqw_and, And, setoeq, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_veqw_or, Or, setoeq, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_veqw_xor, Xor, setoeq, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_veqw_and, And, setueq, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_veqw_or, Or, setueq, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_veqw_xor, Xor, setueq, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_vgtsf_and, And, setgt, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_vgtsf_or, Or, setgt, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_vgtsf_xor, Xor, setgt, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_vgtsf_and, And, setogt, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_vgtsf_or, Or, setogt, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_vgtsf_xor, Xor, setogt, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_vgtsf_and, And, setugt, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_vgtsf_or, Or, setugt, HQ32, HVF32, HVF32>;
+ def: AccRRR_pat<V6_vgtsf_xor, Xor, setugt, HQ32, HVF32, HVF32>;
+
+ def: Pat<(VecQ16 (setone HVF16:$Vt, HVF16:$Vu)),
+ (V6_pred_not (V6_veqh HvxVR:$Vt, HvxVR:$Vu))>;
+
+ def: Pat<(VecQ32 (setone HVF32:$Vt, HVF32:$Vu)),
+ (V6_pred_not (V6_veqw HvxVR:$Vt, HvxVR:$Vu))>;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
index 4cd45ecbe1a1..f927f9b9e7c3 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsV65.td
@@ -7,28 +7,31 @@
//===----------------------------------------------------------------------===//
multiclass vgathermh<RegisterClass RC> {
- let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in
+ let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
+ mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in
def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ),
- (ins IntRegs:$_dst_, IntRegs:$Rt,
- ModRegs:$Mu, RC:$Vv),
+ (ins IntRegs:$_dst_, s4_0Imm:$Ii,
+ IntRegs:$Rt, ModRegs:$Mu, RC:$Vv),
".error \"should not emit\" ",
[]>;
}
multiclass vgathermw<RegisterClass RC> {
- let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in
+ let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
+ mayStore = 1, addrMode = BaseImmOffset, accessSize = WordAccess in
def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ),
- (ins IntRegs:$_dst_, IntRegs:$Rt,
- ModRegs:$Mu, RC:$Vv),
+ (ins IntRegs:$_dst_, s4_0Imm:$Ii,
+ IntRegs:$Rt, ModRegs:$Mu, RC:$Vv),
".error \"should not emit\" ",
[]>;
}
multiclass vgathermhw<RegisterClass RC> {
- let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in
+ let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
+ mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in
def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ),
- (ins IntRegs:$_dst_, IntRegs:$Rt,
- ModRegs:$Mu, RC:$Vv),
+ (ins IntRegs:$_dst_, s4_0Imm:$Ii,
+ IntRegs:$Rt, ModRegs:$Mu, RC:$Vv),
".error \"should not emit\" ",
[]>;
}
@@ -38,28 +41,34 @@ defm V6_vgathermw_pseudo : vgathermw<HvxVR>;
defm V6_vgathermhw_pseudo : vgathermhw<HvxWR>;
multiclass vgathermhq<RegisterClass RC1, RegisterClass RC2> {
- let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in
+ let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
+ mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in
def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ),
- (ins IntRegs:$_dst_, RC2:$Vq, IntRegs:$Rt,
- ModRegs:$Mu, RC1:$Vv),
+ (ins IntRegs:$_dst_, s4_0Imm:$Ii,
+ RC2:$Vq, IntRegs:$Rt, ModRegs:$Mu,
+ RC1:$Vv),
".error \"should not emit\" ",
[]>;
}
multiclass vgathermwq<RegisterClass RC1, RegisterClass RC2> {
- let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in
+ let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
+ mayStore = 1, addrMode = BaseImmOffset, accessSize = WordAccess in
def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ),
- (ins IntRegs:$_dst_, RC2:$Vq, IntRegs:$Rt,
- ModRegs:$Mu, RC1:$Vv),
+ (ins IntRegs:$_dst_, s4_0Imm:$Ii,
+ RC2:$Vq, IntRegs:$Rt, ModRegs:$Mu,
+ RC1:$Vv),
".error \"should not emit\" ",
[]>;
}
multiclass vgathermhwq<RegisterClass RC1, RegisterClass RC2> {
- let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in
+ let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1,
+ mayStore = 1, addrMode = BaseImmOffset, accessSize = HalfWordAccess in
def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ),
- (ins IntRegs:$_dst_, RC2:$Vq, IntRegs:$Rt,
- ModRegs:$Mu, RC1:$Vv),
+ (ins IntRegs:$_dst_, s4_0Imm:$Ii,
+ RC2:$Vq, IntRegs:$Rt, ModRegs:$Mu,
+ RC1:$Vv),
".error \"should not emit\" ",
[]>;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index 8b7138d3c809..4c387c8ba638 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -479,6 +479,10 @@ def VecI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v32i16, v64i16, v32i16]>;
def VecI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v16i32, v32i32, v16i32]>;
+def VecF16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v32f16, v64f16, v32f16]>;
+def VecF32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v16f32, v32f32, v16f32]>;
def VecPI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v128i8, v256i8, v128i8]>;
@@ -486,6 +490,10 @@ def VecPI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v64i16, v128i16, v64i16]>;
def VecPI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v32i32, v64i32, v32i32]>;
+def VecPF16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v64f16, v128f16, v64f16]>;
+def VecPF32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v32f32, v64f32, v32f32]>;
def VecQ8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
[v64i1, v128i1, v64i1]>;
@@ -496,13 +504,13 @@ def VecQ32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
// HVX register classes
-def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32], 512,
+def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32, VecF16, VecF32], 512,
(add (sequence "V%u", 0, 31), VTMP)> {
let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
[RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>;
}
-def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024,
+def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32, VecPF16, VecPF32], 1024,
(add (sequence "W%u", 0, 15), (sequence "WR%u", 0, 15))> {
let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
[RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>;
diff --git a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
index 9a0f57fce97d..ada78ca70559 100644
--- a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -224,14 +224,14 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
unsigned NumRegs = MRI->getNumVirtRegs();
BitVector DoubleRegs(NumRegs);
for (unsigned i = 0; i < NumRegs; ++i) {
- unsigned R = Register::index2VirtReg(i);
+ Register R = Register::index2VirtReg(i);
if (MRI->getRegClass(R) == DoubleRC)
DoubleRegs.set(i);
}
BitVector FixedRegs(NumRegs);
for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
- unsigned R = Register::index2VirtReg(x);
+ Register R = Register::index2VirtReg(x);
MachineInstr *DefI = MRI->getVRegDef(R);
// In some cases a register may exist, but never be defined or used.
// It should never appear anywhere, but mark it as "fixed", just to be
@@ -244,7 +244,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
if (FixedRegs[x])
continue;
- unsigned R = Register::index2VirtReg(x);
+ Register R = Register::index2VirtReg(x);
LLVM_DEBUG(dbgs() << printReg(R, TRI) << " ~~");
USet &Asc = AssocMap[R];
for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end();
@@ -281,7 +281,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
unsigned NextP = 1;
USet Visited;
for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
- unsigned R = Register::index2VirtReg(x);
+ Register R = Register::index2VirtReg(x);
if (Visited.count(R))
continue;
// Create a new partition for R.
@@ -578,8 +578,7 @@ void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
append_range(WorkQ, *WorkQ[i]);
USet Rs;
- for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) {
- MachineLoop *L = WorkQ[i];
+ for (MachineLoop *L : WorkQ) {
Rs.clear();
collectIndRegsForLoop(L, Rs);
if (!Rs.empty())
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 08bb4580b585..bdd2a2cfc5fa 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -228,7 +228,9 @@ bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const {
if (!VecTy->isVectorTy() || isa<ScalableVectorType>(VecTy))
return false;
// Avoid types like <2 x i32*>.
- if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
+ Type *ScalTy = VecTy->getScalarType();
+ if (!ScalTy->isIntegerTy() &&
+ !(ScalTy->isFloatingPointTy() && useHVXFloatingPoint()))
return false;
// The given type may be something like <17 x i32>, which is not MVT,
// but can be represented as (non-simple) EVT.
@@ -466,28 +468,46 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx,
return;
}
- if (!hasV60Ops())
- return;
-
- // Set the latency for a copy to zero since we hope that is will get removed.
+ // Set the latency for a copy to zero since we hope that is will get
+ // removed.
if (DstInst->isCopy())
Dep.setLatency(0);
// If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
// the correct latency.
- if ((DstInst->isRegSequence() || DstInst->isCopy()) && Dst->NumSuccs == 1) {
+ // If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency
+ // only if the latencies on all the uses are equal, otherwise set it to
+ // default.
+ if ((DstInst->isRegSequence() || DstInst->isCopy())) {
Register DReg = DstInst->getOperand(0).getReg();
- MachineInstr *DDst = Dst->Succs[0].getSUnit()->getInstr();
- unsigned UseIdx = -1;
- for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
- const MachineOperand &MO = DDst->getOperand(OpNum);
- if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
- UseIdx = OpNum;
+ int DLatency = -1;
+ for (const auto &DDep : Dst->Succs) {
+ MachineInstr *DDst = DDep.getSUnit()->getInstr();
+ int UseIdx = -1;
+ for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
+ const MachineOperand &MO = DDst->getOperand(OpNum);
+ if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
+ UseIdx = OpNum;
+ break;
+ }
+ }
+
+ if (UseIdx == -1)
+ continue;
+
+ int Latency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0,
+ *DDst, UseIdx));
+ // Set DLatency for the first time.
+ DLatency = (DLatency == -1) ? Latency : DLatency;
+
+ // For multiple uses, if the Latency is different across uses, reset
+ // DLatency.
+ if (DLatency != Latency) {
+ DLatency = -1;
break;
}
}
- int DLatency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst,
- 0, *DDst, UseIdx));
+
DLatency = std::max(DLatency, 0);
Dep.setLatency((unsigned)DLatency);
}
@@ -500,8 +520,10 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx,
Dep.setLatency(0);
return;
}
-
- updateLatency(*SrcInst, *DstInst, Dep);
+ int Latency = Dep.getLatency();
+ bool IsArtificial = Dep.isArtificial();
+ Latency = updateLatency(*SrcInst, *DstInst, IsArtificial, Latency);
+ Dep.setLatency(Latency);
}
void HexagonSubtarget::getPostRAMutations(
@@ -530,21 +552,19 @@ bool HexagonSubtarget::usePredicatedCalls() const {
return EnablePredicatedCalls;
}
-void HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
- MachineInstr &DstInst, SDep &Dep) const {
- if (Dep.isArtificial()) {
- Dep.setLatency(1);
- return;
- }
-
+int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
+ MachineInstr &DstInst, bool IsArtificial,
+ int Latency) const {
+ if (IsArtificial)
+ return 1;
if (!hasV60Ops())
- return;
-
- auto &QII = static_cast<const HexagonInstrInfo&>(*getInstrInfo());
+ return Latency;
+ auto &QII = static_cast<const HexagonInstrInfo &>(*getInstrInfo());
// BSB scheduling.
if (QII.isHVXVec(SrcInst) || useBSBScheduling())
- Dep.setLatency((Dep.getLatency() + 1) >> 1);
+ Latency = (Latency + 1) >> 1;
+ return Latency;
}
void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
@@ -580,9 +600,9 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
// For some instructions (ex: COPY), we might end up with < 0 latency
// as they don't have any Itinerary class associated with them.
Latency = std::max(Latency, 0);
-
+ bool IsArtificial = I.isArtificial();
+ Latency = updateLatency(*SrcI, *DstI, IsArtificial, Latency);
I.setLatency(Latency);
- updateLatency(*SrcI, *DstI, I);
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index e4f375440be1..db682676cf12 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -325,8 +325,8 @@ public:
private:
// Helper function responsible for increasing the latency only.
- void updateLatency(MachineInstr &SrcInst, MachineInstr &DstInst, SDep &Dep)
- const;
+ int updateLatency(MachineInstr &SrcInst, MachineInstr &DstInst,
+ bool IsArtificial, int Latency) const;
void restoreLatency(SUnit *Src, SUnit *Dst) const;
void changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat) const;
bool isBestZeroLatency(SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII,
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index fcf829b522cc..c6703bb8a62a 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -139,6 +139,7 @@ namespace llvm {
void initializeHexagonBitSimplifyPass(PassRegistry&);
void initializeHexagonConstExtendersPass(PassRegistry&);
void initializeHexagonConstPropagationPass(PassRegistry&);
+ void initializeHexagonCopyToCombinePass(PassRegistry&);
void initializeHexagonEarlyIfConversionPass(PassRegistry&);
void initializeHexagonExpandCondsetsPass(PassRegistry&);
void initializeHexagonGenMuxPass(PassRegistry&);
@@ -199,6 +200,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() {
initializeHexagonBitSimplifyPass(PR);
initializeHexagonConstExtendersPass(PR);
initializeHexagonConstPropagationPass(PR);
+ initializeHexagonCopyToCombinePass(PR);
initializeHexagonEarlyIfConversionPass(PR);
initializeHexagonGenMuxPass(PR);
initializeHexagonHardwareLoopsPass(PR);
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 85ec0cdcd8f0..e9b658d18175 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -886,7 +886,8 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
// Create a dot new machine instruction to see if resources can be
// allocated. If not, bail out now.
- int NewOpcode = HII->getDotNewOp(MI);
+ int NewOpcode = (RC != &Hexagon::PredRegsRegClass) ? HII->getDotNewOp(MI) :
+ HII->getDotNewPredOp(MI, MBPI);
const MCInstrDesc &D = HII->get(NewOpcode);
MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI);
@@ -1107,6 +1108,11 @@ static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
HII.isHVXMemWithAIndirect(MI, MJ))
return true;
+ // Don't allow a store and an instruction that must be in slot0 and
+ // doesn't allow a slot1 instruction.
+ if (MI.mayStore() && HII.isRestrictNoSlot1Store(MJ) && HII.isPureSlot0(MJ))
+ return true;
+
// An inline asm cannot be together with a branch, because we may not be
// able to remove the asm out after packetizing (i.e. if the asm must be
// moved past the bundle). Similarly, two asms cannot be together to avoid
@@ -1526,6 +1532,13 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
bool IsVecJ = HII->isHVXVec(J);
bool IsVecI = HII->isHVXVec(I);
+ // Don't reorder the loads if there is an order dependence. This would
+ // occur if the first instruction must go in slot0.
+ if (LoadJ && LoadI && HII->isPureSlot0(J)) {
+ FoundSequentialDependence = true;
+ break;
+ }
+
if (Slot1Store && MF.getSubtarget<HexagonSubtarget>().hasV65Ops() &&
((LoadJ && StoreI && !NVStoreI) ||
(StoreJ && LoadI && !NVStoreJ)) &&
@@ -1696,9 +1709,12 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) {
MachineBasicBlock::iterator MII = MI.getIterator();
MachineBasicBlock *MBB = MI.getParent();
- if (CurrentPacketMIs.empty())
+ if (CurrentPacketMIs.empty()) {
PacketStalls = false;
+ PacketStallCycles = 0;
+ }
PacketStalls |= producesStall(MI);
+ PacketStallCycles = std::max(PacketStallCycles, calcStall(MI));
if (MI.isImplicitDef()) {
// Add to the packet to allow subsequent instructions to be checked
@@ -1818,14 +1834,6 @@ bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
if (Minimal)
return false;
- // Constrainst for not packetizing this MI with existing instructions in a
- // packet.
- // MI is a store instruction.
- // CurrentPacketMIs has a SLOT0 only instruction with constraint
- // A_RESTRICT_NOSLOT1_STORE/isRestrictNoSlot1Store.
- if (MI.mayStore() && isPureSlot0InsnWithNoSlot1Store(MI))
- return false;
-
if (producesStall(MI))
return false;
@@ -1865,25 +1873,8 @@ bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
return true;
}
-bool HexagonPacketizerList::isPureSlot0InsnWithNoSlot1Store(
- const MachineInstr &MI) {
- bool noSlot1Store = false;
- bool isSlot0Only = false;
- for (auto J : CurrentPacketMIs) {
- noSlot1Store |= HII->isRestrictNoSlot1Store(*J);
- isSlot0Only |= HII->isPureSlot0(*J);
- }
-
- return (noSlot1Store && isSlot0Only);
-}
-
// V60 forward scheduling.
-bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
- // If the packet already stalls, then ignore the stall from a subsequent
- // instruction in the same packet.
- if (PacketStalls)
- return false;
-
+unsigned int HexagonPacketizerList::calcStall(const MachineInstr &I) {
// Check whether the previous packet is in a different loop. If this is the
// case, there is little point in trying to avoid a stall because that would
// favor the rare case (loop entry) over the common case (loop iteration).
@@ -1895,10 +1886,12 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
auto *OldBB = OldPacketMIs.front()->getParent();
auto *ThisBB = I.getParent();
if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
- return false;
+ return 0;
}
SUnit *SUI = MIToSUnit[const_cast<MachineInstr *>(&I)];
+ if (!SUI)
+ return 0;
// If the latency is 0 and there is a data dependence between this
// instruction and any instruction in the current packet, we disregard any
@@ -1927,7 +1920,7 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
if (Pred.getSUnit() == SUJ)
if ((Pred.getLatency() == 0 && Pred.isAssignedRegDep()) ||
HII->isNewValueJump(I) || HII->isToBeScheduledASAP(*J, I))
- return false;
+ return 0;
}
// Check if the latency is greater than one between this instruction and any
@@ -1936,10 +1929,20 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
SUnit *SUJ = MIToSUnit[J];
for (auto &Pred : SUI->Preds)
if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1)
- return true;
+ return Pred.getLatency();
}
- return false;
+ return 0;
+}
+
+bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
+ unsigned int Latency = calcStall(I);
+ if (Latency == 0)
+ return false;
+ // Ignore stall unless it stalls more than previous instruction in packet
+ if (PacketStalls)
+ return Latency > PacketStallCycles;
+ return true;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index 27a47220570a..6a709e566f86 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -56,6 +56,9 @@ class HexagonPacketizerList : public VLIWPacketizerList {
// Set to true if the packet contains an instruction that stalls with an
// instruction from the previous packet.
bool PacketStalls = false;
+ // Set to the number of cycles of stall a given instruction will incur
+ // because of dependence on instruction in previous packet.
+ unsigned int PacketStallCycles = 0;
// Set to true if the packet has a duplex pair of sub-instructions.
bool PacketHasDuplex = false;
@@ -156,7 +159,7 @@ protected:
bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J);
bool hasDualStoreDependence(const MachineInstr &I, const MachineInstr &J);
bool producesStall(const MachineInstr &MI);
- bool isPureSlot0InsnWithNoSlot1Store(const MachineInstr &MI);
+ unsigned int calcStall(const MachineInstr &MI);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 21386a91c7b3..6aca8d807872 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -443,7 +443,7 @@ auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr,
// we don't need to do pointer casts.
auto *PtrTy = cast<PointerType>(Ptr->getType());
if (!PtrTy->isOpaque()) {
- Type *ElemTy = PtrTy->getElementType();
+ Type *ElemTy = PtrTy->getNonOpaquePointerElementType();
int ElemSize = HVC.getAllocSizeOf(ElemTy);
if (Adjust % ElemSize == 0 && Adjust != 0) {
Value *Tmp0 =
@@ -718,7 +718,7 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
// Maximum alignment present in the whole address group.
const AddrInfo &WithMaxAlign =
- getMaxOf(BaseInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
+ getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
Align MaxGiven = WithMaxAlign.HaveAlign;
// Minimum alignment present in the move address group.
@@ -1181,12 +1181,15 @@ auto HexagonVectorCombine::rescale(IRBuilder<> &Builder, Value *Mask,
int ToCount = (FromCount * FromSize) / ToSize;
assert((FromCount * FromSize) % ToSize == 0);
+ auto *FromITy = IntegerType::get(F.getContext(), FromSize * 8);
+ auto *ToITy = IntegerType::get(F.getContext(), ToSize * 8);
+
// Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> ->
// -> trunc to <M x i1>.
Value *Ext = Builder.CreateSExt(
- Mask, VectorType::get(FromSTy, FromCount, /*Scalable*/ false));
+ Mask, VectorType::get(FromITy, FromCount, /*Scalable*/ false));
Value *Cast = Builder.CreateBitCast(
- Ext, VectorType::get(ToSTy, ToCount, /*Scalable*/ false));
+ Ext, VectorType::get(ToITy, ToCount, /*Scalable*/ false));
return Builder.CreateTrunc(
Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable*/ false));
}
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index f973862a0c9b..94b878e21f4d 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -659,8 +659,7 @@ void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
delete D;
}
LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
- LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
- ++i) { dbgs() << *Dependences[i] << "\n"; });
+ LLVM_DEBUG(for (const DepChain *D : Dependences) dbgs() << *D << "\n";);
}
Pass *llvm::createHexagonVectorLoopCarriedReuseLegacyPass() {
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 96c2965296ca..8a866cfe9161 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -16,6 +16,7 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCShuffler.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
+
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
@@ -65,7 +66,8 @@ void HexagonMCChecker::init() {
void HexagonMCChecker::initReg(MCInst const &MCI, unsigned R, unsigned &PredReg,
bool &isTrue) {
- if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) {
+ if (HexagonMCInstrInfo::isPredicated(MCII, MCI) &&
+ HexagonMCInstrInfo::isPredReg(RI, R)) {
// Note an used predicate register.
PredReg = R;
isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI);
@@ -123,7 +125,7 @@ void HexagonMCChecker::init(MCInst const &MCI) {
// same packet with an instruction that modifies is explicitly. Deal
// with such situations individually.
SoftDefs.insert(R);
- else if (isPredicateRegister(R) &&
+ else if (HexagonMCInstrInfo::isPredReg(RI, R) &&
HexagonMCInstrInfo::isPredicateLate(MCII, MCI))
// Include implicit late predicates.
LatePreds.insert(R);
@@ -167,7 +169,7 @@ void HexagonMCChecker::init(MCInst const &MCI) {
// side-effect, then note as a soft definition.
SoftDefs.insert(*SRI);
else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) &&
- isPredicateRegister(*SRI))
+ HexagonMCInstrInfo::isPredReg(RI, *SRI))
// Some insns produce predicates too late to be used in the same packet.
LatePreds.insert(*SRI);
else if (i == 0 && HexagonMCInstrInfo::getType(MCII, MCI) ==
@@ -193,7 +195,7 @@ void HexagonMCChecker::init(MCInst const &MCI) {
if (MCI.getOperand(i).isReg()) {
unsigned P = MCI.getOperand(i).getReg();
- if (isPredicateRegister(P))
+ if (HexagonMCInstrInfo::isPredReg(RI, P))
NewPreds.insert(P);
}
}
@@ -202,7 +204,7 @@ HexagonMCChecker::HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII,
MCSubtargetInfo const &STI, MCInst &mcb,
MCRegisterInfo const &ri, bool ReportErrors)
: Context(Context), MCB(mcb), RI(ri), MCII(MCII), STI(STI),
- ReportErrors(ReportErrors), ReversePairs() {
+ ReportErrors(ReportErrors) {
init();
}
@@ -210,8 +212,7 @@ HexagonMCChecker::HexagonMCChecker(HexagonMCChecker const &Other,
MCSubtargetInfo const &STI,
bool CopyReportErrors)
: Context(Other.Context), MCB(Other.MCB), RI(Other.RI), MCII(Other.MCII),
- STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false),
- ReversePairs() {
+ STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false) {
init();
}
@@ -233,9 +234,10 @@ bool HexagonMCChecker::check(bool FullCheck) {
bool chkHWLoop = checkHWLoop();
bool chkValidTmpDst = FullCheck ? checkValidTmpDst() : true;
bool chkLegalVecRegPair = checkLegalVecRegPair();
+ bool ChkHVXAccum = checkHVXAccum();
bool chk = chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl &&
chkAXOK && chkCofMax1 && chkHWLoop && chkValidTmpDst &&
- chkLegalVecRegPair;
+ chkLegalVecRegPair && ChkHVXAccum;
return chk;
}
@@ -274,20 +276,27 @@ static bool isDuplexAGroup(unsigned Opcode) {
}
static bool isNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) {
- unsigned Result = 0;
+ if (HexagonMCInstrInfo::isFloat(MCII, ID))
+ return true;
unsigned Type = HexagonMCInstrInfo::getType(MCII, ID);
- if (Type == HexagonII::TypeDUPLEX) {
- unsigned subInst0Opcode = ID.getOperand(0).getInst()->getOpcode();
- unsigned subInst1Opcode = ID.getOperand(1).getInst()->getOpcode();
- Result += !isDuplexAGroup(subInst0Opcode);
- Result += !isDuplexAGroup(subInst1Opcode);
- } else
- Result +=
- Type != HexagonII::TypeALU32_2op && Type != HexagonII::TypeALU32_3op &&
- Type != HexagonII::TypeALU32_ADDI && Type != HexagonII::TypeS_2op &&
- Type != HexagonII::TypeS_3op &&
- (Type != HexagonII::TypeALU64 || HexagonMCInstrInfo::isFloat(MCII, ID));
- return Result != 0;
+ switch (Type) {
+ case HexagonII::TypeALU32_2op:
+ case HexagonII::TypeALU32_3op:
+ case HexagonII::TypeALU32_ADDI:
+ case HexagonII::TypeS_2op:
+ case HexagonII::TypeS_3op:
+ case HexagonII::TypeEXTENDER:
+ case HexagonII::TypeM:
+ case HexagonII::TypeALU64:
+ return false;
+ case HexagonII::TypeSUBINSN: {
+ return !isDuplexAGroup(ID.getOpcode());
+ }
+ case HexagonII::TypeDUPLEX:
+ llvm_unreachable("unexpected duplex instruction");
+ default:
+ return true;
+ }
}
bool HexagonMCChecker::checkAXOK() {
@@ -315,8 +324,7 @@ bool HexagonMCChecker::checkAXOK() {
void HexagonMCChecker::reportBranchErrors() {
for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
- MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I);
- if (Desc.isBranch() || Desc.isCall() || Desc.isReturn())
+ if (HexagonMCInstrInfo::IsABranchingInst(MCII, STI, I))
reportNote(I.getLoc(), "Branching instruction");
}
}
@@ -326,8 +334,7 @@ bool HexagonMCChecker::checkHWLoop() {
!HexagonMCInstrInfo::isOuterLoop(MCB))
return true;
for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
- MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I);
- if (Desc.isBranch() || Desc.isCall() || Desc.isReturn()) {
+ if (HexagonMCInstrInfo::IsABranchingInst(MCII, STI, I)) {
reportError(MCB.getLoc(),
"Branches cannot be in a packet with hardware loops");
reportBranchErrors();
@@ -340,8 +347,7 @@ bool HexagonMCChecker::checkHWLoop() {
bool HexagonMCChecker::checkCOFMax1() {
SmallVector<MCInst const *, 2> BranchLocations;
for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
- MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I);
- if (Desc.isBranch() || Desc.isCall() || Desc.isReturn())
+ if (HexagonMCInstrInfo::IsABranchingInst(MCII, STI, I))
BranchLocations.push_back(&I);
}
for (unsigned J = 0, N = BranchLocations.size(); J < N; ++J) {
@@ -373,18 +379,8 @@ bool HexagonMCChecker::checkCOFMax1() {
}
bool HexagonMCChecker::checkSlots() {
- unsigned slotsUsed = 0;
- for (auto HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) {
- MCInst const &MCI = *HMI.getInst();
- if (HexagonMCInstrInfo::isImmext(MCI))
- continue;
- if (HexagonMCInstrInfo::isDuplex(MCII, MCI))
- slotsUsed += 2;
- else
- ++slotsUsed;
- }
-
- if (slotsUsed > HEXAGON_PACKET_SIZE) {
+ if (HexagonMCInstrInfo::slotsConsumed(MCII, STI, MCB) >
+ HexagonMCInstrInfo::packetSizeSlots(STI)) {
reportError("invalid instruction packet: out of slots");
return false;
}
@@ -424,81 +420,109 @@ bool HexagonMCChecker::checkPredicates() {
// Check legal use of new values.
bool HexagonMCChecker::checkNewValues() {
- for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
- if (!HexagonMCInstrInfo::isNewValue(MCII, I))
+ for (auto const &ConsumerInst :
+ HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
+ if (!HexagonMCInstrInfo::isNewValue(MCII, ConsumerInst))
continue;
- auto Consumer = HexagonMCInstrInfo::predicateInfo(MCII, I);
- bool Branch = HexagonMCInstrInfo::getDesc(MCII, I).isBranch();
- MCOperand const &Op = HexagonMCInstrInfo::getNewValueOperand(MCII, I);
+
+ const HexagonMCInstrInfo::PredicateInfo ConsumerPredInfo =
+ HexagonMCInstrInfo::predicateInfo(MCII, ConsumerInst);
+
+ bool Branch = HexagonMCInstrInfo::getDesc(MCII, ConsumerInst).isBranch();
+ MCOperand const &Op =
+ HexagonMCInstrInfo::getNewValueOperand(MCII, ConsumerInst);
assert(Op.isReg());
- auto Producer = registerProducer(Op.getReg(), Consumer);
- if (std::get<0>(Producer) == nullptr) {
- reportError(I.getLoc(), "New value register consumer has no producer");
+
+ auto Producer = registerProducer(Op.getReg(), ConsumerPredInfo);
+ const MCInst *const ProducerInst = std::get<0>(Producer);
+ const HexagonMCInstrInfo::PredicateInfo ProducerPredInfo =
+ std::get<2>(Producer);
+
+ if (ProducerInst == nullptr) {
+ reportError(ConsumerInst.getLoc(),
+ "New value register consumer has no producer");
return false;
}
if (!RelaxNVChecks) {
// Checks that statically prove correct new value consumption
- if (std::get<2>(Producer).isPredicated() &&
- (!Consumer.isPredicated() ||
- llvm::HexagonMCInstrInfo::getType(MCII, I) == HexagonII::TypeNCJ)) {
+ if (ProducerPredInfo.isPredicated() &&
+ (!ConsumerPredInfo.isPredicated() ||
+ llvm::HexagonMCInstrInfo::getType(MCII, ConsumerInst) ==
+ HexagonII::TypeNCJ)) {
reportNote(
- std::get<0>(Producer)->getLoc(),
+ ProducerInst->getLoc(),
"Register producer is predicated and consumer is unconditional");
- reportError(I.getLoc(),
+ reportError(ConsumerInst.getLoc(),
"Instruction does not have a valid new register producer");
return false;
}
- if (std::get<2>(Producer).Register != Hexagon::NoRegister &&
- std::get<2>(Producer).Register != Consumer.Register) {
- reportNote(std::get<0>(Producer)->getLoc(),
+ if (ProducerPredInfo.Register != Hexagon::NoRegister &&
+ ProducerPredInfo.Register != ConsumerPredInfo.Register) {
+ reportNote(ProducerInst->getLoc(),
"Register producer does not use the same predicate "
"register as the consumer");
- reportError(I.getLoc(),
+ reportError(ConsumerInst.getLoc(),
"Instruction does not have a valid new register producer");
return false;
}
}
- if (std::get<2>(Producer).Register == Consumer.Register &&
- Consumer.PredicatedTrue != std::get<2>(Producer).PredicatedTrue) {
+ if (ProducerPredInfo.Register == ConsumerPredInfo.Register &&
+ ConsumerPredInfo.PredicatedTrue != ProducerPredInfo.PredicatedTrue) {
reportNote(
- std::get<0>(Producer)->getLoc(),
+ ProducerInst->getLoc(),
"Register producer has the opposite predicate sense as consumer");
- reportError(I.getLoc(),
+ reportError(ConsumerInst.getLoc(),
"Instruction does not have a valid new register producer");
return false;
}
- MCInstrDesc const &Desc =
- HexagonMCInstrInfo::getDesc(MCII, *std::get<0>(Producer));
- if (Desc.OpInfo[std::get<1>(Producer)].RegClass ==
+
+ MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, *ProducerInst);
+ const unsigned ProducerOpIndex = std::get<1>(Producer);
+
+ if (Desc.OpInfo[ProducerOpIndex].RegClass ==
Hexagon::DoubleRegsRegClassID) {
- reportNote(std::get<0>(Producer)->getLoc(),
+ reportNote(ProducerInst->getLoc(),
"Double registers cannot be new-value producers");
- reportError(I.getLoc(),
+ reportError(ConsumerInst.getLoc(),
"Instruction does not have a valid new register producer");
return false;
}
- if ((Desc.mayLoad() && std::get<1>(Producer) == 1) ||
- (Desc.mayStore() && std::get<1>(Producer) == 0)) {
- unsigned Mode =
- HexagonMCInstrInfo::getAddrMode(MCII, *std::get<0>(Producer));
+
+ // The ProducerOpIsMemIndex logic checks for the index of the producer
+ // register operand. Z-reg load instructions have an implicit operand
+ // that's not encoded, so the producer won't appear as the 1-th def, it
+ // will be at the 0-th.
+ const unsigned ProducerOpSearchIndex =
+ (HexagonMCInstrInfo::getType(MCII, *ProducerInst) ==
+ HexagonII::TypeCVI_ZW)
+ ? 0
+ : 1;
+
+ const bool ProducerOpIsMemIndex =
+ ((Desc.mayLoad() && ProducerOpIndex == ProducerOpSearchIndex) ||
+ (Desc.mayStore() && ProducerOpIndex == 0));
+
+ if (ProducerOpIsMemIndex) {
+ unsigned Mode = HexagonMCInstrInfo::getAddrMode(MCII, *ProducerInst);
+
StringRef ModeError;
if (Mode == HexagonII::AbsoluteSet)
ModeError = "Absolute-set";
if (Mode == HexagonII::PostInc)
ModeError = "Auto-increment";
if (!ModeError.empty()) {
- reportNote(std::get<0>(Producer)->getLoc(),
+ reportNote(ProducerInst->getLoc(),
ModeError + " registers cannot be a new-value "
"producer");
- reportError(I.getLoc(),
+ reportError(ConsumerInst.getLoc(),
"Instruction does not have a valid new register producer");
return false;
}
}
- if (Branch && HexagonMCInstrInfo::isFloat(MCII, *std::get<0>(Producer))) {
- reportNote(std::get<0>(Producer)->getLoc(),
+ if (Branch && HexagonMCInstrInfo::isFloat(MCII, *ProducerInst)) {
+ reportNote(ProducerInst->getLoc(),
"FPU instructions cannot be new-value producers for jumps");
- reportError(I.getLoc(),
+ reportError(ConsumerInst.getLoc(),
"Instruction does not have a valid new register producer");
return false;
}
@@ -541,9 +565,11 @@ HexagonMCChecker::registerProducer(
unsigned Register, HexagonMCInstrInfo::PredicateInfo ConsumerPredicate) {
std::tuple<MCInst const *, unsigned, HexagonMCInstrInfo::PredicateInfo>
WrongSense;
+
for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I);
auto ProducerPredicate = HexagonMCInstrInfo::predicateInfo(MCII, I);
+
for (unsigned J = 0, N = Desc.getNumDefs(); J < N; ++J)
for (auto K = MCRegAliasIterator(I.getOperand(J).getReg(), &RI, true);
K.isValid(); ++K)
@@ -568,9 +594,15 @@ void HexagonMCChecker::checkRegisterCurDefs() {
for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
if (HexagonMCInstrInfo::isCVINew(MCII, I) &&
HexagonMCInstrInfo::getDesc(MCII, I).mayLoad()) {
- unsigned Register = I.getOperand(0).getReg();
- if (!registerUsed(Register))
- reportWarning("Register `" + Twine(RI.getName(Register)) +
+ const unsigned RegDef = I.getOperand(0).getReg();
+
+ bool HasRegDefUse = false;
+ for (MCRegAliasIterator Alias(RegDef, &RI, true); Alias.isValid();
+ ++Alias)
+ HasRegDefUse = HasRegDefUse || registerUsed(*Alias);
+
+ if (!HasRegDefUse)
+ reportWarning("Register `" + Twine(RI.getName(RegDef)) +
"' used with `.cur' "
"but not used in the same packet");
}
@@ -599,7 +631,7 @@ bool HexagonMCChecker::checkRegisters() {
reportErrorRegisters(BadR);
return false;
}
- if (!isPredicateRegister(R) && Defs[R].size() > 1) {
+ if (!HexagonMCInstrInfo::isPredReg(RI, R) && Defs[R].size() > 1) {
// Check for multiple register definitions.
PredSet &PM = Defs[R];
@@ -784,3 +816,22 @@ bool HexagonMCChecker::checkLegalVecRegPair() {
}
return true;
}
+
+// Vd.tmp can't be accumulated
+bool HexagonMCChecker::checkHVXAccum()
+{
+ for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
+ bool IsTarget =
+ HexagonMCInstrInfo::isAccumulator(MCII, I) && I.getOperand(0).isReg();
+ if (!IsTarget)
+ continue;
+ unsigned int R = I.getOperand(0).getReg();
+ TmpDefsIterator It = TmpDefs.find(R);
+ if (It != TmpDefs.end()) {
+ reportError("register `" + Twine(RI.getName(R)) + ".tmp" +
+ "' is accumulated in this packet");
+ return false;
+ }
+ }
+ return true;
+}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
index dbd3d8ae45e6..b83931eb88ac 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -81,6 +81,10 @@ class HexagonMCChecker {
void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue);
bool registerUsed(unsigned Register);
+
+ /// \return a tuple of: pointer to the producer instruction or nullptr if
+ /// none was found, the operand index, and the PredicateInfo for the
+ /// producer.
std::tuple<MCInst const *, unsigned, HexagonMCInstrInfo::PredicateInfo>
registerProducer(unsigned Register,
HexagonMCInstrInfo::PredicateInfo Predicated);
@@ -100,14 +104,10 @@ class HexagonMCChecker {
bool checkCOFMax1();
bool checkLegalVecRegPair();
bool checkValidTmpDst();
+ bool checkHVXAccum();
static void compoundRegisterMap(unsigned &);
- bool isPredicateRegister(unsigned R) const {
- return (Hexagon::P0 == R || Hexagon::P1 == R || Hexagon::P2 == R ||
- Hexagon::P3 == R);
- }
-
bool isLoopRegister(unsigned R) const {
return (Hexagon::SA0 == R || Hexagon::LC0 == R || Hexagon::SA1 == R ||
Hexagon::LC1 == R);
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 33b2e9a9e302..f8ac35aed7c0 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -712,7 +712,6 @@ unsigned
HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
SmallVectorImpl<MCFixup> &Fixups,
MCSubtargetInfo const &STI) const {
-#ifndef NDEBUG
size_t OperandNumber = ~0U;
for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i)
if (&MI.getOperand(i) == &MO) {
@@ -720,7 +719,6 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
break;
}
assert((OperandNumber != ~0U) && "Operand not found");
-#endif
if (HexagonMCInstrInfo::isNewValue(MCII, MI) &&
&MO == &HexagonMCInstrInfo::getNewValueOperand(MCII, MI)) {
@@ -777,9 +775,13 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
assert(!MO.isImm());
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- if (HexagonMCInstrInfo::isSubInstruction(MI) ||
- HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCJ)
+ switch (HexagonMCInstrInfo::getDesc(MCII, MI).OpInfo[OperandNumber].RegClass) {
+ case GeneralSubRegsRegClassID:
+ case GeneralDoubleLow8RegsRegClassID:
return HexagonMCInstrInfo::getDuplexRegisterNumbering(Reg);
+ default:
+ break;
+ }
return MCT.getRegisterInfo()->getEncodingValue(Reg);
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index e7ade7834a9f..3deef95df324 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -365,8 +365,10 @@ static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context,
MCI.begin() + HexagonMCInstrInfo::bundleInstructionsOffset;
B != MCI.end(); ++B) {
MCInst const *Inst = B->getInst();
- if (JumpInst == Inst)
+ if (JumpInst == Inst) {
+ BExtended = false;
continue;
+ }
if (HexagonMCInstrInfo::isImmext(*Inst)) {
BExtended = true;
continue;
@@ -405,24 +407,27 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo co
if (MCI.size() < 2)
return;
- bool StartedValid = llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI);
-
// Create a vector, needed to keep the order of jump instructions.
MCInst CheckList(MCI);
+ // Keep the last known good bundle around in case the shuffle fails.
+ MCInst LastValidBundle(MCI);
+
+ bool PreviouslyValid = llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI);
+
// Look for compounds until none are found, only update the bundle when
// a compound is found.
while (lookForCompound(MCII, Context, CheckList)) {
- // Keep the original bundle around in case the shuffle fails.
- MCInst OriginalBundle(MCI);
-
// Need to update the bundle.
MCI = CheckList;
- if (StartedValid &&
- !llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI)) {
+ const bool IsValid = llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI);
+ if (PreviouslyValid && !IsValid) {
LLVM_DEBUG(dbgs() << "Found ERROR\n");
- MCI = OriginalBundle;
+ MCI = LastValidBundle;
+ } else if (IsValid) {
+ LastValidBundle = MCI;
+ PreviouslyValid = true;
}
}
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index e1c95f1cc920..36d6c8c9f84b 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -284,8 +284,6 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
case Hexagon::J2_jumprf:
case Hexagon::J2_jumprtnew:
case Hexagon::J2_jumprfnew:
- case Hexagon::J2_jumprtnewpt:
- case Hexagon::J2_jumprfnewpt:
case Hexagon::PS_jmprett:
case Hexagon::PS_jmpretf:
case Hexagon::PS_jmprettnew:
@@ -303,8 +301,6 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
case Hexagon::L4_return_f:
case Hexagon::L4_return_tnew_pnt:
case Hexagon::L4_return_fnew_pnt:
- case Hexagon::L4_return_tnew_pt:
- case Hexagon::L4_return_fnew_pt:
// [if ([!]p0[.new])] dealloc_return
SrcReg = MCI.getOperand(1).getReg();
if (Hexagon::P0 == SrcReg) {
@@ -699,6 +695,7 @@ inline static void addOps(MCInst &subInstPtr, MCInst const &Inst,
MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
MCInst Result;
+ Result.setLoc(Inst.getLoc());
bool Absolute;
int64_t Value;
switch (Inst.getOpcode()) {
@@ -830,7 +827,6 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
Result.setOpcode(Hexagon::SL2_jumpr31_f);
break; // none SUBInst if (!p0) jumpr r31
case Hexagon::J2_jumprfnew:
- case Hexagon::J2_jumprfnewpt:
case Hexagon::PS_jmpretfnewpt:
case Hexagon::PS_jmpretfnew:
Result.setOpcode(Hexagon::SL2_jumpr31_fnew);
@@ -840,7 +836,6 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
Result.setOpcode(Hexagon::SL2_jumpr31_t);
break; // none SUBInst if (p0) jumpr r31
case Hexagon::J2_jumprtnew:
- case Hexagon::J2_jumprtnewpt:
case Hexagon::PS_jmprettnewpt:
case Hexagon::PS_jmprettnew:
Result.setOpcode(Hexagon::SL2_jumpr31_tnew);
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 68ccb20f4f15..494b0e6cbac6 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -128,23 +128,28 @@ bool canonicalizePacketImpl(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
bool CheckOk = Check ? Check->check(false) : true;
if (!CheckOk)
return false;
+
+ MCInst OrigMCB = MCB;
+
// Examine the packet and convert pairs of instructions to compound
// instructions when possible.
if (!HexagonDisableCompound)
HexagonMCInstrInfo::tryCompound(MCII, STI, Context, MCB);
HexagonMCShuffle(Context, false, MCII, STI, MCB);
+ const SmallVector<DuplexCandidate, 8> possibleDuplexes =
+ (STI.getFeatureBits()[Hexagon::FeatureDuplex])
+ ? HexagonMCInstrInfo::getDuplexPossibilties(MCII, STI, MCB)
+ : SmallVector<DuplexCandidate, 8>();
+
// Examine the packet and convert pairs of instructions to duplex
// instructions when possible.
- if (STI.getFeatureBits() [Hexagon::FeatureDuplex]) {
- SmallVector<DuplexCandidate, 8> possibleDuplexes;
- possibleDuplexes =
- HexagonMCInstrInfo::getDuplexPossibilties(MCII, STI, MCB);
- HexagonMCShuffle(Context, MCII, STI, MCB, possibleDuplexes);
- }
+ HexagonMCShuffle(Context, MCII, STI, MCB, possibleDuplexes);
+
// Examines packet and pad the packet, if needed, when an
// end-loop is in the bundle.
HexagonMCInstrInfo::padEndloop(MCB, Context);
+
// If compounding and duplexing didn't reduce the size below
// 4 or less we have a packet that is too big.
if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) {
@@ -156,7 +161,9 @@ bool canonicalizePacketImpl(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
CheckOk = Check ? Check->check(true) : true;
if (!CheckOk)
return false;
+
HexagonMCShuffle(Context, true, MCII, STI, MCB);
+
return true;
}
} // namespace
@@ -857,16 +864,16 @@ bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) {
}
int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) {
- auto Sentinal = static_cast<int64_t>(std::numeric_limits<uint32_t>::max())
+ auto Sentinel = static_cast<int64_t>(std::numeric_limits<uint32_t>::max())
<< 8;
if (MCI.size() <= Index)
- return Sentinal;
+ return Sentinel;
MCOperand const &MCO = MCI.getOperand(Index);
if (!MCO.isExpr())
- return Sentinal;
+ return Sentinel;
int64_t Value;
if (!MCO.getExpr()->evaluateAsAbsolute(Value))
- return Sentinal;
+ return Sentinel;
return Value;
}
@@ -915,10 +922,7 @@ void HexagonMCInstrInfo::padEndloop(MCInst &MCB, MCContext &Context) {
MCInst Nop;
Nop.setOpcode(Hexagon::A2_nop);
assert(isBundle(MCB));
- while ((HexagonMCInstrInfo::isInnerLoop(MCB) &&
- (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) ||
- ((HexagonMCInstrInfo::isOuterLoop(MCB) &&
- (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE))))
+ while (LoopNeedsPadding(MCB))
MCB.addOperand(MCOperand::createInst(new (Context) MCInst(Nop)));
}
@@ -1030,3 +1034,19 @@ unsigned HexagonMCInstrInfo::SubregisterBit(unsigned Consumer,
return Consumer == Producer;
return 0;
}
+
+bool HexagonMCInstrInfo::LoopNeedsPadding(MCInst const &MCB) {
+ return (
+ (HexagonMCInstrInfo::isInnerLoop(MCB) &&
+ (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) ||
+ ((HexagonMCInstrInfo::isOuterLoop(MCB) &&
+ (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE))));
+}
+
+bool HexagonMCInstrInfo::IsABranchingInst(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCInst const &I) {
+ assert(!HexagonMCInstrInfo::isBundle(I));
+ MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I);
+ return (Desc.isBranch() || Desc.isCall() || Desc.isReturn());
+}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 5c56db14798f..f0c4a86fde78 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -65,18 +65,24 @@ public:
namespace HexagonMCInstrInfo {
-size_t const innerLoopOffset = 0;
-int64_t const innerLoopMask = 1 << innerLoopOffset;
+constexpr size_t innerLoopOffset = 0;
+constexpr int64_t innerLoopMask = 1 << innerLoopOffset;
-size_t const outerLoopOffset = 1;
-int64_t const outerLoopMask = 1 << outerLoopOffset;
+constexpr size_t outerLoopOffset = 1;
+constexpr int64_t outerLoopMask = 1 << outerLoopOffset;
// do not reorder memory load/stores by default load/stores are re-ordered
// and by default loads can be re-ordered
-size_t const memReorderDisabledOffset = 2;
-int64_t const memReorderDisabledMask = 1 << memReorderDisabledOffset;
+constexpr size_t memReorderDisabledOffset = 2;
+constexpr int64_t memReorderDisabledMask = 1 << memReorderDisabledOffset;
-size_t const bundleInstructionsOffset = 1;
+constexpr size_t splitNoMemOrderOffset = 3;
+constexpr int64_t splitNoMemorderMask = 1 << splitNoMemOrderOffset;
+
+constexpr size_t noShuffleOffset = 4;
+constexpr int64_t noShuffleMask = 1 << noShuffleOffset;
+
+constexpr size_t bundleInstructionsOffset = 1;
void addConstant(MCInst &MI, uint64_t Value, MCContext &Context);
void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
@@ -95,6 +101,8 @@ bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCContext &Context, MCInst &MCB,
HexagonMCChecker *Checker,
bool AttemptCompatibility = false);
+bool IsABranchingInst(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst const &I);
// Create a duplex instruction given the two subinsts
MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
@@ -307,6 +315,10 @@ bool mustNotExtend(MCExpr const &Expr);
// Returns true if this instruction requires a slot to execute.
bool requiresSlot(MCSubtargetInfo const &STI, MCInst const &MCI);
+
+// Returns true if \a MCB would require endloop padding.
+bool LoopNeedsPadding(MCInst const &MCB);
+
unsigned packetSize(StringRef CPU);
// Returns the maximum number of slots available in the given
@@ -318,8 +330,7 @@ unsigned packetSizeSlots(MCSubtargetInfo const &STI);
unsigned slotsConsumed(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCInst const &MCI);
-
-// Pad the bundle with nops to satisfy endloop requirements
+// Pad the bundle with nops to satisfy endloop requirements.
void padEndloop(MCInst &MCI, MCContext &Context);
class PredicateInfo {
public:
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
index d38b77b42fbc..d96fade71a84 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -81,10 +81,9 @@ void HexagonMCShuffler::copyTo(MCInst &MCB) {
MCB.addOperand(MCOperand::createImm(BundleFlags));
MCB.setLoc(Loc);
// Copy the results into the bundle.
- for (HexagonShuffler::iterator I = begin(); I != end(); ++I) {
-
- MCInst const &MI = I->getDesc();
- MCInst const *Extender = I->getExtender();
+ for (auto &I : *this) {
+ MCInst const &MI = I.getDesc();
+ MCInst const *Extender = I.getExtender();
if (Extender)
MCB.addOperand(MCOperand::createInst(Extender));
MCB.addOperand(MCOperand::createInst(&MI));
@@ -101,10 +100,10 @@ bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) {
return false;
}
-bool llvm::HexagonMCShuffle(MCContext &Context, bool Fatal,
+bool llvm::HexagonMCShuffle(MCContext &Context, bool ReportErrors,
MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCInst &MCB) {
- HexagonMCShuffler MCS(Context, Fatal, MCII, STI, MCB);
+ HexagonMCShuffler MCS(Context, ReportErrors, MCII, STI, MCB);
if (DisableShuffle)
// Ignore if user chose so.
@@ -128,11 +127,11 @@ bool llvm::HexagonMCShuffle(MCContext &Context, bool Fatal,
return MCS.reshuffleTo(MCB);
}
-bool
-llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII,
- MCSubtargetInfo const &STI, MCInst &MCB,
- SmallVector<DuplexCandidate, 8> possibleDuplexes) {
- if (DisableShuffle)
+bool llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI, MCInst &MCB,
+ SmallVector<DuplexCandidate, 8> possibleDuplexes) {
+
+ if (DisableShuffle || possibleDuplexes.size() == 0)
return false;
if (!HexagonMCInstrInfo::bundleSize(MCB)) {
@@ -173,10 +172,8 @@ llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII,
HexagonMCShuffler MCS(Context, false, MCII, STI, MCB);
doneShuffling = MCS.reshuffleTo(MCB); // shuffle
}
- if (!doneShuffling)
- return true;
- return false;
+ return doneShuffling;
}
bool llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII,
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
index 3410c0ddbd84..4fc8addb27bc 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -28,16 +28,17 @@ class MCSubtargetInfo;
// Insn bundle shuffler.
class HexagonMCShuffler : public HexagonShuffler {
public:
- HexagonMCShuffler(MCContext &Context, bool Fatal, MCInstrInfo const &MCII,
- MCSubtargetInfo const &STI, MCInst &MCB)
- : HexagonShuffler(Context, Fatal, MCII, STI) {
+ HexagonMCShuffler(MCContext &Context, bool ReportErrors,
+ MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &MCB)
+ : HexagonShuffler(Context, ReportErrors, MCII, STI) {
init(MCB);
}
- HexagonMCShuffler(MCContext &Context, bool Fatal, MCInstrInfo const &MCII,
- MCSubtargetInfo const &STI, MCInst &MCB,
- MCInst const &AddMI, bool InsertAtFront)
- : HexagonShuffler(Context, Fatal, MCII, STI) {
+ HexagonMCShuffler(MCContext &Context, bool ReportErrors,
+ MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &MCB, MCInst const &AddMI, bool InsertAtFront)
+ : HexagonShuffler(Context, ReportErrors, MCII, STI) {
init(MCB, AddMI, InsertAtFront);
}
@@ -52,9 +53,11 @@ private:
void init(MCInst &MCB, MCInst const &AddMI, bool InsertAtFront);
};
-// Invocation of the shuffler.
-bool HexagonMCShuffle(MCContext &Context, bool Fatal, MCInstrInfo const &MCII,
- MCSubtargetInfo const &STI, MCInst &MCB);
+// Invocation of the shuffler. Returns true if the shuffle succeeded. If
+// true, MCB will contain the newly-shuffled packet.
+bool HexagonMCShuffle(MCContext &Context, bool ReportErrors,
+ MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst &MCB);
bool HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII,
MCSubtargetInfo const &STI, MCInst &MCB,
MCInst const &AddMI, int fixupCount);
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index dfdddb50657c..6a08d7503bac 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -517,6 +517,14 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,
return nullptr;
}
+ // Add qfloat subtarget feature by default to v68 and above
+ // unless explicitely disabled
+ if (checkFeature(X, Hexagon::ExtensionHVXV68) &&
+ ArchFS.find("-hvx-qfloat", 0) == std::string::npos) {
+ llvm::FeatureBitset Features = X->getFeatureBits();
+ X->setFeatureBits(Features.set(Hexagon::ExtensionHVXQFloat));
+ }
+
if (HexagonDisableDuplex) {
llvm::FeatureBitset Features = X->getFeatureBits();
X->setFeatureBits(Features.reset(Hexagon::FeatureDuplex));
@@ -551,21 +559,11 @@ void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI,
}
unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
- static std::map<StringRef,unsigned> ElfFlags = {
- {"hexagonv5", ELF::EF_HEXAGON_MACH_V5},
- {"hexagonv55", ELF::EF_HEXAGON_MACH_V55},
- {"hexagonv60", ELF::EF_HEXAGON_MACH_V60},
- {"hexagonv62", ELF::EF_HEXAGON_MACH_V62},
- {"hexagonv65", ELF::EF_HEXAGON_MACH_V65},
- {"hexagonv66", ELF::EF_HEXAGON_MACH_V66},
- {"hexagonv67", ELF::EF_HEXAGON_MACH_V67},
- {"hexagonv67t", ELF::EF_HEXAGON_MACH_V67T},
- {"hexagonv68", ELF::EF_HEXAGON_MACH_V68},
- {"hexagonv69", ELF::EF_HEXAGON_MACH_V69},
- };
+ using llvm::Hexagon::ElfFlagsByCpuStr;
- auto F = ElfFlags.find(STI.getCPU());
- assert(F != ElfFlags.end() && "Unrecognized Architecture");
+ const std::string CPU(STI.getCPU().str());
+ auto F = ElfFlagsByCpuStr.find(CPU);
+ assert(F != ElfFlagsByCpuStr.end() && "Unrecognized Architecture");
return F->second;
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 1fce90b82864..d82731e153fe 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -167,7 +167,8 @@ static bool checkHVXPipes(const HVXInstsT &hvxInsts, unsigned startIdx,
HexagonShuffler::HexagonShuffler(MCContext &Context, bool ReportErrors,
MCInstrInfo const &MCII,
MCSubtargetInfo const &STI)
- : Context(Context), MCII(MCII), STI(STI), ReportErrors(ReportErrors) {
+ : Context(Context), BundleFlags(), MCII(MCII), STI(STI),
+ ReportErrors(ReportErrors), CheckFailure() {
reset();
}
@@ -244,8 +245,8 @@ void HexagonShuffler::restrictNoSlot1Store(
"Instruction does not allow a store in slot 1"));
}
-bool HexagonShuffler::applySlotRestrictions(
- HexagonPacketSummary const &Summary) {
+bool HexagonShuffler::applySlotRestrictions(HexagonPacketSummary const &Summary,
+ const bool DoShuffle) {
// These restrictions can modify the slot masks in the instructions
// in the Packet member. They should run unconditionally and their
// order does not matter.
@@ -262,7 +263,7 @@ bool HexagonShuffler::applySlotRestrictions(
if (!CheckFailure)
restrictBranchOrder(Summary);
if (!CheckFailure)
- restrictPreferSlot3(Summary);
+ restrictPreferSlot3(Summary, DoShuffle);
return !CheckFailure;
}
@@ -303,10 +304,9 @@ void HexagonShuffler::restrictBranchOrder(HexagonPacketSummary const &Summary) {
Packet = PacketSave;
}
- reportError("invalid instruction packet: out of slots");
+ reportResourceError(Summary, "out of slots");
}
-
void HexagonShuffler::permitNonSlot() {
for (HexagonInstr &ISJ : insts()) {
const bool RequiresSlot = HexagonMCInstrInfo::requiresSlot(STI, *ISJ.ID);
@@ -319,21 +319,19 @@ bool HexagonShuffler::ValidResourceUsage(HexagonPacketSummary const &Summary) {
Optional<HexagonPacket> ShuffledPacket = tryAuction(Summary);
if (!ShuffledPacket) {
- reportError("invalid instruction packet: slot error");
+ reportResourceError(Summary, "slot error");
return false;
- } else {
- Packet = *ShuffledPacket;
}
// Verify the CVI slot subscriptions.
- llvm::stable_sort(*this, HexagonInstr::lessCVI);
+ llvm::stable_sort(*ShuffledPacket, HexagonInstr::lessCVI);
// create vector of hvx instructions to check
HVXInstsT hvxInsts;
hvxInsts.clear();
- for (const_iterator I = cbegin(); I != cend(); ++I) {
+ for (const auto &I : *ShuffledPacket) {
struct CVIUnits inst;
- inst.Units = I->CVI.getUnits();
- inst.Lanes = I->CVI.getLanes();
+ inst.Units = I.CVI.getUnits();
+ inst.Lanes = I.CVI.getLanes();
if (inst.Units == 0)
continue; // not an hvx inst or an hvx inst that doesn't uses any pipes
hvxInsts.push_back(inst);
@@ -349,6 +347,9 @@ bool HexagonShuffler::ValidResourceUsage(HexagonPacketSummary const &Summary) {
return false;
}
}
+
+ Packet = *ShuffledPacket;
+
return true;
}
@@ -438,6 +439,15 @@ bool HexagonShuffler::restrictStoreLoadOrder(
return true;
}
+static std::string SlotMaskToText(unsigned SlotMask) {
+ SmallVector<std::string, HEXAGON_PRESHUFFLE_PACKET_SIZE> Slots;
+ for (unsigned SlotNum = 0; SlotNum < HEXAGON_PACKET_SIZE; SlotNum++)
+ if ((SlotMask & (1 << SlotNum)) != 0)
+ Slots.push_back(utostr(SlotNum));
+
+ return llvm::join(Slots, StringRef(", "));
+}
+
HexagonShuffler::HexagonPacketSummary HexagonShuffler::GetPacketSummary() {
HexagonPacketSummary Summary = HexagonPacketSummary();
@@ -454,8 +464,13 @@ HexagonShuffler::HexagonPacketSummary HexagonShuffler::GetPacketSummary() {
++Summary.pSlot3Cnt;
Summary.PrefSlot3Inst = ISJ;
}
- Summary.ReservedSlotMask |=
+ const unsigned ReservedSlots =
HexagonMCInstrInfo::getOtherReservedSlots(MCII, STI, ID);
+ Summary.ReservedSlotMask |= ReservedSlots;
+ if (ReservedSlots != 0)
+ AppliedRestrictions.push_back(std::make_pair(ID.getLoc(),
+ (Twine("Instruction has reserved slots: ") +
+ SlotMaskToText(ReservedSlots)).str()));
switch (HexagonMCInstrInfo::getType(MCII, ID)) {
case HexagonII::TypeS_2op:
@@ -463,7 +478,8 @@ HexagonShuffler::HexagonPacketSummary HexagonShuffler::GetPacketSummary() {
case HexagonII::TypeALU64:
break;
case HexagonII::TypeJ:
- Summary.branchInsts.push_back(ISJ);
+ if (HexagonMCInstrInfo::IsABranchingInst(MCII, STI, *ISJ->ID))
+ Summary.branchInsts.push_back(ISJ);
break;
case HexagonII::TypeCVI_VM_VP_LDU:
case HexagonII::TypeCVI_VM_LD:
@@ -565,14 +581,15 @@ bool HexagonShuffler::ValidPacketMemoryOps(
return !InvalidPacket;
}
-void HexagonShuffler::restrictPreferSlot3(HexagonPacketSummary const &Summary) {
+void HexagonShuffler::restrictPreferSlot3(HexagonPacketSummary const &Summary,
+ const bool DoShuffle) {
// flag if an instruction requires to be in slot 3
const bool HasOnlySlot3 = llvm::any_of(insts(), [&](HexagonInstr const &I) {
return (I.Core.getUnits() == Slot3Mask);
});
- const bool NeedsPrefSlot3Shuffle =
- (Summary.branchInsts.size() <= 1 && !HasOnlySlot3 &&
- Summary.pSlot3Cnt == 1 && Summary.PrefSlot3Inst);
+ const bool NeedsPrefSlot3Shuffle = Summary.branchInsts.size() <= 1 &&
+ !HasOnlySlot3 && Summary.pSlot3Cnt == 1 &&
+ Summary.PrefSlot3Inst && DoShuffle;
if (!NeedsPrefSlot3Shuffle)
return;
@@ -590,9 +607,9 @@ void HexagonShuffler::restrictPreferSlot3(HexagonPacketSummary const &Summary) {
}
/// Check that the packet is legal and enforce relative insn order.
-bool HexagonShuffler::check() {
+bool HexagonShuffler::check(const bool RequireShuffle) {
const HexagonPacketSummary Summary = GetPacketSummary();
- if (!applySlotRestrictions(Summary))
+ if (!applySlotRestrictions(Summary, RequireShuffle))
return false;
if (!ValidPacketMemoryOps(Summary)) {
@@ -600,13 +617,14 @@ bool HexagonShuffler::check() {
return false;
}
- ValidResourceUsage(Summary);
+ if (RequireShuffle)
+ ValidResourceUsage(Summary);
return !CheckFailure;
}
llvm::Optional<HexagonShuffler::HexagonPacket>
-HexagonShuffler::tryAuction(HexagonPacketSummary const &Summary) const {
+HexagonShuffler::tryAuction(HexagonPacketSummary const &Summary) {
HexagonPacket PacketResult = Packet;
HexagonUnitAuction AuctionCore(Summary.ReservedSlotMask);
llvm::stable_sort(PacketResult, HexagonInstr::lessCore);
@@ -635,13 +653,13 @@ bool HexagonShuffler::shuffle() {
if (size() > HEXAGON_PACKET_SIZE) {
// Ignore a packet with with more than what a packet can hold
// or with compound or duplex insns for now.
- reportError(Twine("invalid instruction packet"));
+ reportError("invalid instruction packet");
return false;
}
// Check and prepare packet.
- bool Ok = true;
- if (size() > 1 && (Ok = check()))
+ bool Ok = check();
+ if (size() > 1 && Ok)
// Reorder the handles for each slot.
for (unsigned nSlot = 0, emptySlots = 0; nSlot < HEXAGON_PACKET_SIZE;
++nSlot) {
@@ -684,6 +702,32 @@ bool HexagonShuffler::shuffle() {
return Ok;
}
+void HexagonShuffler::reportResourceError(HexagonPacketSummary const &Summary, StringRef Err) {
+ if (ReportErrors)
+ reportResourceUsage(Summary);
+ reportError(Twine("invalid instruction packet: ") + Err);
+}
+
+
+void HexagonShuffler::reportResourceUsage(HexagonPacketSummary const &Summary) {
+ auto SM = Context.getSourceManager();
+ if (SM) {
+ for (HexagonInstr const &I : insts()) {
+ const unsigned Units = I.Core.getUnits();
+
+ if (HexagonMCInstrInfo::requiresSlot(STI, *I.ID)) {
+ const std::string UnitsText = Units ? SlotMaskToText(Units) : "<None>";
+ SM->PrintMessage(I.ID->getLoc(), SourceMgr::DK_Note,
+ Twine("Instruction can utilize slots: ") +
+ UnitsText);
+ }
+ else if (!HexagonMCInstrInfo::isImmext(*I.ID))
+ SM->PrintMessage(I.ID->getLoc(), SourceMgr::DK_Note,
+ "Instruction does not require a slot");
+ }
+ }
+}
+
void HexagonShuffler::reportError(Twine const &Msg) {
CheckFailure = true;
if (ReportErrors) {
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 1b4ebc5111db..70992e4c7e81 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -72,16 +72,6 @@ public:
using UnitsAndLanes = std::pair<unsigned, unsigned>;
private:
- // Available HVX slots.
- enum {
- CVI_NONE = 0,
- CVI_XLANE = 1 << 0,
- CVI_SHIFT = 1 << 1,
- CVI_MPY0 = 1 << 2,
- CVI_MPY1 = 1 << 3,
- CVI_ZW = 1 << 4
- };
-
// Count of adjacent slots that the insn requires to be executed.
unsigned Lanes;
// Flag whether the insn is a load or a store.
@@ -177,21 +167,23 @@ protected:
bool ReportErrors;
bool CheckFailure;
std::vector<std::pair<SMLoc, std::string>> AppliedRestrictions;
- bool applySlotRestrictions(HexagonPacketSummary const &Summary);
+
+ bool applySlotRestrictions(HexagonPacketSummary const &Summary,
+ const bool DoShuffle);
void restrictSlot1AOK(HexagonPacketSummary const &Summary);
void restrictNoSlot1Store(HexagonPacketSummary const &Summary);
void restrictNoSlot1();
bool restrictStoreLoadOrder(HexagonPacketSummary const &Summary);
void restrictBranchOrder(HexagonPacketSummary const &Summary);
- void restrictPreferSlot3(HexagonPacketSummary const &Summary);
+ void restrictPreferSlot3(HexagonPacketSummary const &Summary,
+ const bool DoShuffle);
void permitNonSlot();
- Optional<HexagonPacket> tryAuction(HexagonPacketSummary const &Summary) const;
+ Optional<HexagonPacket> tryAuction(HexagonPacketSummary const &Summary);
HexagonPacketSummary GetPacketSummary();
bool ValidPacketMemoryOps(HexagonPacketSummary const &Summary) const;
bool ValidResourceUsage(HexagonPacketSummary const &Summary);
- bool validPacketInsts() const;
public:
using iterator = HexagonPacket::iterator;
@@ -205,7 +197,7 @@ public:
// Reset to initial state.
void reset();
// Check if the bundle may be validly shuffled.
- bool check();
+ bool check(const bool RequireShuffle = true);
// Reorder the insn handles in the bundle.
bool shuffle();
@@ -242,6 +234,8 @@ public:
// Return the error code for the last check or shuffling of the bundle.
void reportError(Twine const &Msg);
+ void reportResourceError(HexagonPacketSummary const &Summary, StringRef Err);
+ void reportResourceUsage(HexagonPacketSummary const &Summary);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index a994bd7e57a4..660215ca7435 100644
--- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -141,7 +141,7 @@ struct LanaiOperand : public MCParsedAsmOperand {
struct MemOp Mem;
};
- explicit LanaiOperand(KindTy Kind) : MCParsedAsmOperand(), Kind(Kind) {}
+ explicit LanaiOperand(KindTy Kind) : Kind(Kind) {}
public:
// The functions below are used by the autogenerated ASM matcher and hence to
diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index 0d9e63c112fb..010ff80ad42a 100644
--- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -284,7 +284,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight(
void LanaiTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
- SDValue Result(nullptr, 0);
+ SDValue Result;
// Only support length 1 constraints for now.
if (Constraint.length() > 1)
@@ -511,7 +511,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
// the sret argument into rv for the return. Save the argument into
// a virtual register so that we can access it from the return points.
if (MF.getFunction().hasStructRetAttr()) {
- unsigned Reg = LanaiMFI->getSRetReturnReg();
+ Register Reg = LanaiMFI->getSRetReturnReg();
if (!Reg) {
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32));
LanaiMFI->setSRetReturnReg(Reg);
@@ -577,7 +577,7 @@ LanaiTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (DAG.getMachineFunction().getFunction().hasStructRetAttr()) {
MachineFunction &MF = DAG.getMachineFunction();
LanaiMachineFunctionInfo *LanaiMFI = MF.getInfo<LanaiMachineFunctionInfo>();
- unsigned Reg = LanaiMFI->getSRetReturnReg();
+ Register Reg = LanaiMFI->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val =
@@ -1077,7 +1077,7 @@ SDValue LanaiTargetLowering::LowerRETURNADDR(SDValue Op,
// Return the link register, which contains the return address.
// Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32));
+ Register Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
}
diff --git a/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
index 67443b771d3d..ce79bdafc425 100644
--- a/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
+++ b/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
@@ -412,9 +412,8 @@ bool LanaiMemAluCombiner::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget<LanaiSubtarget>().getInstrInfo();
bool Modified = false;
- for (MfIterator MFI = MF.begin(); MFI != MF.end(); ++MFI) {
- Modified |= combineMemAluInBasicBlock(&*MFI);
- }
+ for (MachineBasicBlock &MBB : MF)
+ Modified |= combineMemAluInBasicBlock(&MBB);
return Modified;
}
} // namespace
diff --git a/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp b/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
index abe20c8e18cf..03cf10205173 100644
--- a/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -165,7 +165,7 @@ void LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if ((isSPLSOpcode(MI.getOpcode()) && !isInt<10>(Offset)) ||
!isInt<16>(Offset)) {
assert(RS && "Register scavenging must be on");
- unsigned Reg = RS->FindUnusedReg(&Lanai::GPRRegClass);
+ Register Reg = RS->FindUnusedReg(&Lanai::GPRRegClass);
if (!Reg)
Reg = RS->scavengeRegister(&Lanai::GPRRegClass, II, SPAdj);
assert(Reg && "Register scavenger failed");
diff --git a/llvm/lib/Target/Lanai/LanaiSubtarget.cpp b/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
index d9d7847a0c5a..37a4843e1bc4 100644
--- a/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
+++ b/llvm/lib/Target/Lanai/LanaiSubtarget.cpp
@@ -43,4 +43,4 @@ LanaiSubtarget::LanaiSubtarget(const Triple &TargetTriple, StringRef Cpu,
CodeGenOpt::Level /*OptLevel*/)
: LanaiGenSubtargetInfo(TargetTriple, Cpu, /*TuneCPU*/ Cpu, FeatureString),
FrameLowering(initializeSubtargetDependencies(Cpu, FeatureString)),
- InstrInfo(), TLInfo(TM, *this), TSInfo() {}
+ TLInfo(TM, *this) {}
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
index 7027d18126bb..d8a66bc8a0da 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
@@ -148,7 +148,7 @@ void LanaiInstPrinter::printInst(const MCInst *MI, uint64_t Address,
void LanaiInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &OS, const char *Modifier) {
- assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ assert((Modifier == nullptr || Modifier[0] == 0) && "No modifiers supported");
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg())
OS << "%" << getRegisterName(Op.getReg());
diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
index 4db879c34ad9..dcd581875f60 100644
--- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
+++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
@@ -1,4 +1,4 @@
-//===---- M68kAsmParser.cpp - Parse M68k assembly to MCInst instructions --===//
+//===-- M68kAsmParser.cpp - Parse M68k assembly to MCInst instructions ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
index a08ffa787095..a565ff4e004d 100644
--- a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
+++ b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
@@ -1,4 +1,4 @@
-//===- M68kDisassembler.cpp - Disassembler for M68k -------------*- C++ -*-===//
+//===-- M68kDisassembler.cpp - Disassembler for M68k ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
index 9cd959012e6f..b3d17184f1fe 100644
--- a/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
@@ -1,4 +1,4 @@
-//===-- M68kCallLowering.cpp - Call lowering -------------------*- C++ -*-===//
+//===-- M68kCallLowering.cpp - Call lowering --------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/GISel/M68kCallLowering.h b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
index 47cdefdba100..24212e6dd9c6 100644
--- a/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
+++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
@@ -1,4 +1,4 @@
-//===-- M68kCallLowering.h - Call lowering -------------------*- C++ -*-===//
+//===-- M68kCallLowering.h - Call lowering ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp b/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp
index 9ac4ab9a5ba1..a627eccd110d 100644
--- a/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp
@@ -1,4 +1,4 @@
-//===- M68kInstructionSelector.cpp ------------------------------*- C++ -*-===//
+//===-- M68kInstructionSelector.cpp -----------------------------*- C++ -*-===//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the targeting of the InstructionSelector class for
diff --git a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
index bcbe62816beb..860c0ce29326 100644
--- a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
@@ -1,4 +1,4 @@
-//===-- M68kLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
+//===-- M68kLegalizerInfo.cpp -----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h
index 205aa81aedcc..a10401ed1a9a 100644
--- a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h
+++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h
@@ -1,4 +1,4 @@
-//===- M68kLegalizerInfo --------------------------------------*- C++ -*-==//
+//===-- M68kLegalizerInfo ---------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
index 5c0f5dae8e37..b6ed6ab28a5d 100644
--- a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
@@ -1,4 +1,4 @@
-//===-- M68kRegisterBankInfo.cpp -------------------------------*- C++ -*-===//
+//===-- M68kRegisterBankInfo.cpp --------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h
index 853c75df2bb3..6c0b8ca7ba5a 100644
--- a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h
+++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h
@@ -1,4 +1,4 @@
-//===-- M68kRegisterBankInfo.h ---------------------------------*- C++ -*-===//
+//===-- M68kRegisterBankInfo.h ----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -42,4 +42,4 @@ public:
getInstrMapping(const MachineInstr &MI) const override;
};
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_GLSEL_M68KREGISTERBANKINFO_H
diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td b/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td
index 942677a60e6c..2a00ec065cd4 100644
--- a/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td
+++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td
@@ -1,4 +1,4 @@
-//===-- M68kRegisterBanks.td - Describe the M68k Banks -------*- tablegen -*-===//
+//===-- M68kRegisterBanks.td - Describe the M68k Banks -----*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68k.h b/llvm/lib/Target/M68k/M68k.h
index cef40bee7d93..b6069d736deb 100644
--- a/llvm/lib/Target/M68k/M68k.h
+++ b/llvm/lib/Target/M68k/M68k.h
@@ -1,4 +1,4 @@
-//===- M68k.h - Top-level interface for M68k representation -*- C++ -*-===//
+//===-- M68k.h - Top-level interface for M68k representation ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -54,4 +54,4 @@ createM68kInstructionSelector(const M68kTargetMachine &, const M68kSubtarget &,
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_M68K_H
diff --git a/llvm/lib/Target/M68k/M68k.td b/llvm/lib/Target/M68k/M68k.td
index fde491e1b6d5..de7a6c82d110 100644
--- a/llvm/lib/Target/M68k/M68k.td
+++ b/llvm/lib/Target/M68k/M68k.td
@@ -1,4 +1,4 @@
-//===-- M68k.td - Motorola 680x0 target definitions ------*- tablegen -*-===//
+//===-- M68k.td - Motorola 680x0 target definitions --------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
index 08b7153632b4..3bcce9e3ba3b 100644
--- a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
+++ b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===----- M68kAsmPrinter.cpp - M68k LLVM Assembly Printer -----*- C++ -*-===//
+//===-- M68kAsmPrinter.cpp - M68k LLVM Assembly Printer ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kAsmPrinter.h b/llvm/lib/Target/M68k/M68kAsmPrinter.h
index dff3bb876336..1a76e3bf4e27 100644
--- a/llvm/lib/Target/M68k/M68kAsmPrinter.h
+++ b/llvm/lib/Target/M68k/M68kAsmPrinter.h
@@ -1,4 +1,4 @@
-//===----- M68kAsmPrinter.h - M68k LLVM Assembly Printer -------- C++ -*--===//
+//===-- M68kAsmPrinter.h - M68k LLVM Assembly Printer -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -66,4 +66,4 @@ public:
};
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_M68KASMPRINTER_H
diff --git a/llvm/lib/Target/M68k/M68kCallingConv.h b/llvm/lib/Target/M68k/M68kCallingConv.h
index 20ffa993897f..6823df5472df 100644
--- a/llvm/lib/Target/M68k/M68kCallingConv.h
+++ b/llvm/lib/Target/M68k/M68kCallingConv.h
@@ -1,4 +1,4 @@
-//===-- M68kCallingConv.h - M68k Custom CC Routines ---------*- C++ -*-===//
+//===-- M68kCallingConv.h - M68k Custom CC Routines -------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -74,4 +74,4 @@ inline bool CC_M68k_Any_AssignToReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_M68KCALLINGCONV_H
diff --git a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp
index 4149ae92ffe9..7f0c0dd92dbb 100644
--- a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp
+++ b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp
@@ -1,4 +1,4 @@
-//===----- M68kCollapseMOVEMPass.cpp - Expand MOVEM pass --------*- C++ -*-===//
+//===-- M68kCollapseMOVEMPass.cpp - Expand MOVEM pass -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
index 6a4aeaab518a..acfa30f28c2b 100644
--- a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
+++ b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
@@ -1,4 +1,4 @@
-//===--M68kExpandPseudo.cpp - Expand pseudo instructions ------*- C++ -*-===//
+//===-- M68kExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kFrameLowering.cpp b/llvm/lib/Target/M68k/M68kFrameLowering.cpp
index 66ea6ae38f43..643e156f9446 100644
--- a/llvm/lib/Target/M68k/M68kFrameLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kFrameLowering.cpp
@@ -1,4 +1,4 @@
-//===-- M68kFrameLowering.cpp - M68k Frame Information ------*- C++ -*-===//
+//===-- M68kFrameLowering.cpp - M68k Frame Information ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -157,7 +157,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
MachineOperand &MO = MBBI->getOperand(i);
if (!MO.isReg() || MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
@@ -463,7 +463,7 @@ void M68kFrameLowering::emitPrologueCalleeSavedFrameMoves(
// Calculate offsets.
for (const auto &I : CSI) {
int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
BuildCFI(MBB, MBBI, DL,
@@ -485,7 +485,7 @@ void M68kFrameLowering::emitPrologue(MachineFunction &MF,
uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
bool HasFP = hasFP(MF);
bool NeedsDwarfCFI = MMI.hasDebugInfo() || Fn.needsUnwindTableEntry();
- unsigned FramePtr = TRI->getFrameRegister(MF);
+ Register FramePtr = TRI->getFrameRegister(MF);
const unsigned MachineFramePtr = FramePtr;
unsigned BasePtr = TRI->getBaseRegister();
@@ -683,7 +683,7 @@ void M68kFrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc DL;
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
- unsigned FramePtr = TRI->getFrameRegister(MF);
+ Register FramePtr = TRI->getFrameRegister(MF);
unsigned MachineFramePtr = FramePtr;
// Get the number of bytes to allocate from the FrameInfo.
@@ -819,7 +819,7 @@ bool M68kFrameLowering::assignCalleeSavedSpillSlots(
// Since emitPrologue and emitEpilogue will handle spilling and restoring of
// the frame register, we can delete it from CSI list and not have to worry
// about avoiding it later.
- unsigned FPReg = TRI->getFrameRegister(MF);
+ Register FPReg = TRI->getFrameRegister(MF);
for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
CSI.erase(CSI.begin() + i);
@@ -842,7 +842,7 @@ bool M68kFrameLowering::spillCalleeSavedRegisters(
unsigned Mask = 0;
for (const auto &Info : CSI) {
FI = std::max(FI, Info.getFrameIdx());
- unsigned Reg = Info.getReg();
+ Register Reg = Info.getReg();
unsigned Shift = MRI.getSpillRegisterOrder(Reg);
Mask |= 1 << Shift;
}
@@ -856,7 +856,7 @@ bool M68kFrameLowering::spillCalleeSavedRegisters(
const MachineFunction &MF = *MBB.getParent();
const MachineRegisterInfo &RI = MF.getRegInfo();
for (const auto &Info : CSI) {
- unsigned Reg = Info.getReg();
+ Register Reg = Info.getReg();
bool IsLiveIn = RI.isLiveIn(Reg);
if (!IsLiveIn)
MBB.addLiveIn(Reg);
@@ -877,7 +877,7 @@ bool M68kFrameLowering::restoreCalleeSavedRegisters(
unsigned Mask = 0;
for (const auto &Info : CSI) {
FI = std::max(FI, Info.getFrameIdx());
- unsigned Reg = Info.getReg();
+ Register Reg = Info.getReg();
unsigned Shift = MRI.getSpillRegisterOrder(Reg);
Mask |= 1 << Shift;
}
diff --git a/llvm/lib/Target/M68k/M68kFrameLowering.h b/llvm/lib/Target/M68k/M68kFrameLowering.h
index 0eba9e08d858..a5349377232e 100644
--- a/llvm/lib/Target/M68k/M68kFrameLowering.h
+++ b/llvm/lib/Target/M68k/M68kFrameLowering.h
@@ -1,4 +1,4 @@
-//===- M68kFrameLowering.h - Define frame lowering for M68k -*- C++ -*-===//
+//===-- M68kFrameLowering.h - Define frame lowering for M68k ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -169,4 +169,4 @@ public:
};
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_M68KFRAMELOWERING_H
diff --git a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
index 0076c2647df3..9ef97b96ea9a 100644
--- a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
+++ b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
@@ -1,4 +1,4 @@
-//===- M68kISelDAGToDAG.cpp - M68k Dag to Dag Inst Selector -*- C++ -*-===//
+//===-- M68kISelDAGToDAG.cpp - M68k Dag to Dag Inst Selector ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp
index 79b395f8f984..dba190a2ebc0 100644
--- a/llvm/lib/Target/M68k/M68kISelLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- M68kISelLowering.cpp - M68k DAG Lowering Impl ------*- C++ -*--===//
+//===-- M68kISelLowering.cpp - M68k DAG Lowering Impl -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -268,7 +268,7 @@ static bool MatchingStackOffset(SDValue Arg, unsigned Offset,
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
- unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!Register::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
@@ -900,7 +900,7 @@ SDValue M68kTargetLowering::LowerFormalArguments(
else
llvm_unreachable("Unknown argument type!");
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
// If this is an 8 or 16-bit value, it is really passed promoted to 32
@@ -1276,7 +1276,7 @@ bool M68kTargetLowering::IsEligibleForTailCallOptimization(
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc())
continue;
- unsigned Reg = VA.getLocReg();
+ Register Reg = VA.getLocReg();
switch (Reg) {
default:
break;
@@ -1409,32 +1409,32 @@ SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Arith, SetCC);
}
-/// Create a BT (Bit Test) node - Test bit \p BitNo in \p Src and set condition
-/// according to equal/not-equal condition code \p CC.
+/// Create a BTST (Bit Test) node - Test bit \p BitNo in \p Src and set
+/// condition according to equal/not-equal condition code \p CC.
static SDValue getBitTestCondition(SDValue Src, SDValue BitNo, ISD::CondCode CC,
const SDLoc &DL, SelectionDAG &DAG) {
- // If Src is i8, promote it to i32 with any_extend. There is no i8 BT
+ // If Src is i8, promote it to i32 with any_extend. There is no i8 BTST
// instruction. Since the shift amount is in-range-or-undefined, we know
// that doing a bittest on the i32 value is ok.
if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
// If the operand types disagree, extend the shift amount to match. Since
- // BT ignores high bits (like shifts) we can use anyextend.
+ // BTST ignores high bits (like shifts) we can use anyextend.
if (Src.getValueType() != BitNo.getValueType())
BitNo = DAG.getNode(ISD::ANY_EXTEND, DL, Src.getValueType(), BitNo);
- SDValue BT = DAG.getNode(M68kISD::BT, DL, MVT::i32, Src, BitNo);
+ SDValue BTST = DAG.getNode(M68kISD::BTST, DL, MVT::i32, Src, BitNo);
// NOTE BTST sets CCR.Z flag
M68k::CondCode Cond = CC == ISD::SETEQ ? M68k::COND_NE : M68k::COND_EQ;
return DAG.getNode(M68kISD::SETCC, DL, MVT::i8,
- DAG.getConstant(Cond, DL, MVT::i8), BT);
+ DAG.getConstant(Cond, DL, MVT::i8), BTST);
}
-/// Result of 'and' is compared against zero. Change to a BT node if possible.
-static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &DL,
- SelectionDAG &DAG) {
+/// Result of 'and' is compared against zero. Change to a BTST node if possible.
+static SDValue LowerAndToBTST(SDValue And, ISD::CondCode CC, const SDLoc &DL,
+ SelectionDAG &DAG) {
SDValue Op0 = And.getOperand(0);
SDValue Op1 = And.getOperand(1);
if (Op0.getOpcode() == ISD::TRUNCATE)
@@ -1468,7 +1468,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &DL,
RHS = AndLHS.getOperand(1);
}
- // Use BT if the immediate can't be encoded in a TEST instruction.
+ // Use BTST if the immediate can't be encoded in a TEST instruction.
if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) {
LHS = AndLHS;
RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), DL, LHS.getValueType());
@@ -1592,8 +1592,8 @@ static unsigned TranslateM68kCC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
}
// Convert (truncate (srl X, N) to i1) to (bt X, N)
-static SDValue LowerTruncateToBT(SDValue Op, ISD::CondCode CC, const SDLoc &DL,
- SelectionDAG &DAG) {
+static SDValue LowerTruncateToBTST(SDValue Op, ISD::CondCode CC,
+ const SDLoc &DL, SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1 &&
"Expected TRUNCATE to i1 node");
@@ -1889,14 +1889,14 @@ SDValue M68kTargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned M68kCC,
}
/// Result of 'and' or 'trunc to i1' is compared against zero.
-/// Change to a BT node if possible.
-SDValue M68kTargetLowering::LowerToBT(SDValue Op, ISD::CondCode CC,
- const SDLoc &DL,
- SelectionDAG &DAG) const {
+/// Change to a BTST node if possible.
+SDValue M68kTargetLowering::LowerToBTST(SDValue Op, ISD::CondCode CC,
+ const SDLoc &DL,
+ SelectionDAG &DAG) const {
if (Op.getOpcode() == ISD::AND)
- return LowerAndToBT(Op, CC, DL, DAG);
+ return LowerAndToBTST(Op, CC, DL, DAG);
if (Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1)
- return LowerTruncateToBT(Op, CC, DL, DAG);
+ return LowerTruncateToBTST(Op, CC, DL, DAG);
return SDValue();
}
@@ -1909,14 +1909,14 @@ SDValue M68kTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- // Optimize to BT if possible.
- // Lower (X & (1 << N)) == 0 to BT(X, N).
- // Lower ((X >>u N) & 1) != 0 to BT(X, N).
- // Lower ((X >>s N) & 1) != 0 to BT(X, N).
- // Lower (trunc (X >> N) to i1) to BT(X, N).
+ // Optimize to BTST if possible.
+ // Lower (X & (1 << N)) == 0 to BTST(X, N).
+ // Lower ((X >>u N) & 1) != 0 to BTST(X, N).
+ // Lower ((X >>s N) & 1) != 0 to BTST(X, N).
+ // Lower (trunc (X >> N) to i1) to BTST(X, N).
if (Op0.hasOneUse() && isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- if (SDValue NewSetCC = LowerToBT(Op0, CC, DL, DAG)) {
+ if (SDValue NewSetCC = LowerToBTST(Op0, CC, DL, DAG)) {
if (VT == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, NewSetCC);
return NewSetCC;
@@ -2099,7 +2099,7 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool IllegalFPCMov = false;
- if ((isM68kLogicalCmp(Cmp) && !IllegalFPCMov) || Opc == M68kISD::BT) {
+ if ((isM68kLogicalCmp(Cmp) && !IllegalFPCMov) || Opc == M68kISD::BTST) {
Cond = Cmp;
addTest = false;
}
@@ -2163,7 +2163,7 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) {
+ if (SDValue NewSetCC = LowerToBTST(Cond, ISD::SETNE, DL, DAG)) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
addTest = false;
@@ -2282,7 +2282,7 @@ SDValue M68kTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- if (isM68kLogicalCmp(Cmp) || Opc == M68kISD::BT) {
+ if (isM68kLogicalCmp(Cmp) || Opc == M68kISD::BTST) {
Cond = Cmp;
AddTest = false;
} else {
@@ -2427,7 +2427,7 @@ SDValue M68kTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// We know the result is compared against zero. Try to match it to BT.
if (Cond.hasOneUse()) {
- if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) {
+ if (SDValue NewSetCC = LowerToBTST(Cond, ISD::SETNE, DL, DAG)) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
AddTest = false;
@@ -3101,9 +3101,9 @@ M68kTargetLowering::EmitLoweredSelect(MachineInstr &MI,
// destination registers, and the registers that went into the PHI.
for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
- unsigned DestReg = MIIt->getOperand(0).getReg();
- unsigned Op1Reg = MIIt->getOperand(1).getReg();
- unsigned Op2Reg = MIIt->getOperand(2).getReg();
+ Register DestReg = MIIt->getOperand(0).getReg();
+ Register Op1Reg = MIIt->getOperand(1).getReg();
+ Register Op2Reg = MIIt->getOperand(2).getReg();
// If this CMOV we are generating is the opposite condition from
// the jump we generated, then we have to swap the operands for the
@@ -3211,13 +3211,13 @@ SDValue M68kTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
auto &MRI = MF.getRegInfo();
auto SPTy = getPointerTy(DAG.getDataLayout());
auto *ARClass = getRegClassFor(SPTy);
- unsigned Vreg = MRI.createVirtualRegister(ARClass);
+ Register Vreg = MRI.createVirtualRegister(ARClass);
Chain = DAG.getCopyToReg(Chain, DL, Vreg, Size);
Result = DAG.getNode(M68kISD::SEG_ALLOCA, DL, SPTy, Chain,
DAG.getRegister(Vreg, SPTy));
} else {
auto &TLI = DAG.getTargetLoweringInfo();
- unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
@@ -3391,8 +3391,8 @@ const char *M68kTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "M68kISD::AND";
case M68kISD::CMP:
return "M68kISD::CMP";
- case M68kISD::BT:
- return "M68kISD::BT";
+ case M68kISD::BTST:
+ return "M68kISD::BTST";
case M68kISD::SELECT:
return "M68kISD::SELECT";
case M68kISD::CMOV:
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.h b/llvm/lib/Target/M68k/M68kISelLowering.h
index 6a5a40a8815b..9375a99962eb 100644
--- a/llvm/lib/Target/M68k/M68kISelLowering.h
+++ b/llvm/lib/Target/M68k/M68kISelLowering.h
@@ -1,4 +1,4 @@
-//===-- M68kISelLowering.h - M68k DAG Lowering Interface ----*- C++ -*-===//
+//===-- M68kISelLowering.h - M68k DAG Lowering Interface --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -43,7 +43,7 @@ enum NodeType {
CMP,
/// M68k bit-test instructions.
- BT,
+ BTST,
/// M68k Select
SELECT,
@@ -204,8 +204,8 @@ private:
const CCValAssign &VA, ISD::ArgFlagsTy Flags) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerToBT(SDValue And, ISD::CondCode CC, const SDLoc &DL,
- SelectionDAG &DAG) const;
+ SDValue LowerToBTST(SDValue And, ISD::CondCode CC, const SDLoc &DL,
+ SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -276,4 +276,4 @@ private:
};
} // namespace llvm
-#endif // M68kISELLOWERING_H
+#endif // LLVM_LIB_TARGET_M68K_M68KISELLOWERING_H
diff --git a/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
index b2c05365d30b..ef50de576641 100644
--- a/llvm/lib/Target/M68k/M68kInstrArithmetic.td
+++ b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
@@ -150,8 +150,7 @@ let mayLoad = 1, mayStore = 1 in {
// FIXME MxBiArOp_FMR/FMI cannot consume CCR from MxAdd/MxSub which leads for
// MxAdd to survive the match and subsequent mismatch.
-class MxBiArOp_FMR<string MN, SDNode NODE, MxType TYPE,
- MxOperand MEMOpd, ComplexPattern MEMPat,
+class MxBiArOp_FMR<string MN, MxType TYPE, MxOperand MEMOpd,
bits<4> CMD, MxEncEA EA, MxEncExt EXT>
: MxInst<(outs), (ins MEMOpd:$dst, TYPE.ROp:$opd),
MN#"."#TYPE.Prefix#"\t$opd, $dst",
@@ -160,8 +159,7 @@ class MxBiArOp_FMR<string MN, SDNode NODE, MxType TYPE,
!cast<MxEncOpMode>("MxOpMode"#TYPE.Size#"EA"#TYPE.RLet),
MxBeadDReg<1>, EA, EXT>>;
-class MxBiArOp_FMI<string MN, SDNode NODE, MxType TYPE,
- MxOperand MEMOpd, ComplexPattern MEMPat,
+class MxBiArOp_FMI<string MN, MxType TYPE, MxOperand MEMOpd,
bits<4> CMD, MxEncEA MEMEA, MxEncExt MEMExt>
: MxInst<(outs), (ins MEMOpd:$dst, TYPE.IOp:$opd),
MN#"."#TYPE.Prefix#"\t$opd, $dst",
@@ -218,47 +216,47 @@ multiclass MxBiArOp_DF<string MN, SDNode NODE, bit isComm,
def NAME#"32di" : MxBiArOp_RFRI_xEA<MN, NODE, MxType32d, CMD>;
// op $reg, $mem
- def NAME#"8pd" : MxBiArOp_FMR<MN, NODE, MxType8d, MxType8.POp, MxType8.PPat,
+ def NAME#"8pd" : MxBiArOp_FMR<MN, MxType8d, MxType8.POp,
CMD, MxEncEAp_0, MxExtI16_0>;
- def NAME#"16pd" : MxBiArOp_FMR<MN, NODE, MxType16d, MxType16.POp, MxType16.PPat,
+ def NAME#"16pd" : MxBiArOp_FMR<MN, MxType16d, MxType16.POp,
CMD, MxEncEAp_0, MxExtI16_0>;
- def NAME#"32pd" : MxBiArOp_FMR<MN, NODE, MxType32d, MxType32.POp, MxType32.PPat,
+ def NAME#"32pd" : MxBiArOp_FMR<MN, MxType32d, MxType32.POp,
CMD, MxEncEAp_0, MxExtI16_0>;
- def NAME#"8fd" : MxBiArOp_FMR<MN, NODE, MxType8d, MxType8.FOp, MxType8.FPat,
+ def NAME#"8fd" : MxBiArOp_FMR<MN, MxType8d, MxType8.FOp,
CMD, MxEncEAf_0, MxExtBrief_0>;
- def NAME#"16fd" : MxBiArOp_FMR<MN, NODE, MxType16d, MxType16.FOp, MxType16.FPat,
+ def NAME#"16fd" : MxBiArOp_FMR<MN, MxType16d, MxType16.FOp,
CMD, MxEncEAf_0, MxExtBrief_0>;
- def NAME#"32fd" : MxBiArOp_FMR<MN, NODE, MxType32d, MxType32.FOp, MxType32.FPat,
+ def NAME#"32fd" : MxBiArOp_FMR<MN, MxType32d, MxType32.FOp,
CMD, MxEncEAf_0, MxExtBrief_0>;
- def NAME#"8jd" : MxBiArOp_FMR<MN, NODE, MxType8d, MxType8.JOp, MxType8.JPat,
+ def NAME#"8jd" : MxBiArOp_FMR<MN, MxType8d, MxType8.JOp,
CMD, MxEncEAj_0, MxExtEmpty>;
- def NAME#"16jd" : MxBiArOp_FMR<MN, NODE, MxType16d, MxType16.JOp, MxType16.JPat,
+ def NAME#"16jd" : MxBiArOp_FMR<MN, MxType16d, MxType16.JOp,
CMD, MxEncEAj_0, MxExtEmpty>;
- def NAME#"32jd" : MxBiArOp_FMR<MN, NODE, MxType32d, MxType32.JOp, MxType32.JPat,
+ def NAME#"32jd" : MxBiArOp_FMR<MN, MxType32d, MxType32.JOp,
CMD, MxEncEAj_0, MxExtEmpty>;
// op $imm, $mem
- def NAME#"8pi" : MxBiArOp_FMI<MN, NODE, MxType8, MxType8.POp, MxType8.PPat,
+ def NAME#"8pi" : MxBiArOp_FMI<MN, MxType8, MxType8.POp,
CMDI, MxEncEAp_0, MxExtI16_0>;
- def NAME#"16pi" : MxBiArOp_FMI<MN, NODE, MxType16, MxType16.POp, MxType16.PPat,
+ def NAME#"16pi" : MxBiArOp_FMI<MN, MxType16, MxType16.POp,
CMDI, MxEncEAp_0, MxExtI16_0>;
- def NAME#"32pi" : MxBiArOp_FMI<MN, NODE, MxType32, MxType32.POp, MxType32.PPat,
+ def NAME#"32pi" : MxBiArOp_FMI<MN, MxType32, MxType32.POp,
CMDI, MxEncEAp_0, MxExtI16_0>;
- def NAME#"8fi" : MxBiArOp_FMI<MN, NODE, MxType8, MxType8.FOp, MxType8.FPat,
+ def NAME#"8fi" : MxBiArOp_FMI<MN, MxType8, MxType8.FOp,
CMDI, MxEncEAf_0, MxExtBrief_0>;
- def NAME#"16fi" : MxBiArOp_FMI<MN, NODE, MxType16, MxType16.FOp, MxType16.FPat,
+ def NAME#"16fi" : MxBiArOp_FMI<MN, MxType16, MxType16.FOp,
CMDI, MxEncEAf_0, MxExtBrief_0>;
- def NAME#"32fi" : MxBiArOp_FMI<MN, NODE, MxType32, MxType32.FOp, MxType32.FPat,
+ def NAME#"32fi" : MxBiArOp_FMI<MN, MxType32, MxType32.FOp,
CMDI, MxEncEAf_0, MxExtBrief_0>;
- def NAME#"8ji" : MxBiArOp_FMI<MN, NODE, MxType8, MxType8.JOp, MxType8.JPat,
+ def NAME#"8ji" : MxBiArOp_FMI<MN, MxType8, MxType8.JOp,
CMDI, MxEncEAj_0, MxExtEmpty>;
- def NAME#"16ji" : MxBiArOp_FMI<MN, NODE, MxType16, MxType16.JOp, MxType16.JPat,
+ def NAME#"16ji" : MxBiArOp_FMI<MN, MxType16, MxType16.JOp,
CMDI, MxEncEAj_0, MxExtEmpty>;
- def NAME#"32ji" : MxBiArOp_FMI<MN, NODE, MxType32, MxType32.JOp, MxType32.JPat,
+ def NAME#"32ji" : MxBiArOp_FMI<MN, MxType32, MxType32.JOp,
CMDI, MxEncEAj_0, MxExtEmpty>;
def NAME#"16dr" : MxBiArOp_RFRR_xEA<MN, NODE, MxType16d, MxType16r,
@@ -284,8 +282,7 @@ multiclass MxBiArOp_DF<string MN, SDNode NODE, bit isComm,
// operations do not produce CCR we should not match them against Mx nodes that
// produce it.
let Pattern = [(null_frag)] in
-multiclass MxBiArOp_AF<string MN, SDNode NODE, bit isComm,
- bits<4> CMD, bits<4> CMDI> {
+multiclass MxBiArOp_AF<string MN, SDNode NODE, bits<4> CMD> {
def NAME#"32ak" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.KOp, MxType32.KPat,
CMD, MxEncEAk, MxExtBrief_2>;
@@ -307,9 +304,9 @@ multiclass MxBiArOp_AF<string MN, SDNode NODE, bit isComm,
// NOTE These naturally produce CCR
defm ADD : MxBiArOp_DF<"add", MxAdd, 1, 0xD, 0x6>;
-defm ADD : MxBiArOp_AF<"adda", MxAdd, 1, 0xD, 0x6>;
+defm ADD : MxBiArOp_AF<"adda", MxAdd, 0xD>;
defm SUB : MxBiArOp_DF<"sub", MxSub, 0, 0x9, 0x4>;
-defm SUB : MxBiArOp_AF<"suba", MxSub, 0, 0x9, 0x4>;
+defm SUB : MxBiArOp_AF<"suba", MxSub, 0x9>;
let Uses = [CCR], Defs = [CCR] in {
diff --git a/llvm/lib/Target/M68k/M68kInstrBits.td b/llvm/lib/Target/M68k/M68kInstrBits.td
index d97ca50f74a9..d610bce5c277 100644
--- a/llvm/lib/Target/M68k/M68kInstrBits.td
+++ b/llvm/lib/Target/M68k/M68kInstrBits.td
@@ -1,4 +1,4 @@
-//===------- M68kInstrBits.td - Bit Manipulation Instrs --*- tablegen -*-===//
+//===-- M68kInstrBits.td - Bit Manipulation Instrs ---------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -12,7 +12,7 @@
///
/// Machine:
///
-/// BCNG [ ] BCLR [ ] BSET [ ] BTST [~]
+/// BCHG [ ] BCLR [ ] BSET [ ] BTST [~]
///
/// Map:
///
@@ -51,24 +51,24 @@ class MxBTSTEnc_I<MxBead8Imm IMM, MxEncEA EA, MxEncExt EXT>
let Defs = [CCR] in {
class MxBTST_RR<MxType TYPE>
: MxInst<(outs), (ins TYPE.ROp:$dst, TYPE.ROp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBt TYPE.VT:$dst, TYPE.VT:$bitno))],
+ [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.VT:$bitno))],
MxBTSTEnc_R<MxBeadDReg<1>, MxEncEAd_0, MxExtEmpty>>;
class MxBTST_RI<MxType TYPE>
: MxInst<(outs), (ins TYPE.ROp:$dst, TYPE.IOp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBt TYPE.VT:$dst, TYPE.IPat:$bitno))],
+ [(set CCR, (MxBtst TYPE.VT:$dst, TYPE.IPat:$bitno))],
MxBTSTEnc_I<MxBead8Imm<1>, MxEncEAd_0, MxExtEmpty>>;
class MxBTST_MR<MxType TYPE, MxOperand MEMOpd, ComplexPattern MEMPat,
MxEncEA EA, MxEncExt EXT>
: MxInst<(outs), (ins MEMOpd:$dst, TYPE.ROp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBt (TYPE.Load MEMPat:$dst), TYPE.VT:$bitno))],
+ [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.VT:$bitno))],
MxBTSTEnc_R<MxBeadDReg<1>, EA, EXT>>;
class MxBTST_MI<MxType TYPE, MxOperand MEMOpd, ComplexPattern MEMPat,
MxEncEA EA, MxEncExt EXT>
: MxInst<(outs), (ins MEMOpd:$dst, TYPE.IOp:$bitno), "btst\t$bitno, $dst",
- [(set CCR, (MxBt (TYPE.Load MEMPat:$dst), TYPE.IPat:$bitno))],
+ [(set CCR, (MxBtst (TYPE.Load MEMPat:$dst), TYPE.IPat:$bitno))],
MxBTSTEnc_I<MxBead8Imm<1>, EA, EXT>>;
} // Defs = [CCR]
diff --git a/llvm/lib/Target/M68k/M68kInstrBuilder.h b/llvm/lib/Target/M68k/M68kInstrBuilder.h
index e32b1b047a2b..e85bd270287c 100644
--- a/llvm/lib/Target/M68k/M68kInstrBuilder.h
+++ b/llvm/lib/Target/M68k/M68kInstrBuilder.h
@@ -1,4 +1,4 @@
-//===-- M68kInstrBuilder.h - Functions to build M68k insts --*- C++ -*-===//
+//===-- M68kInstrBuilder.h - Functions to build M68k insts ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -91,4 +91,4 @@ addMemOperand(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
} // end namespace M68k
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_M6800_M6800INSTRBUILDER_H
+#endif // LLVM_LIB_TARGET_M68K_M68KINSTRBUILDER_H
diff --git a/llvm/lib/Target/M68k/M68kInstrCompiler.td b/llvm/lib/Target/M68k/M68kInstrCompiler.td
index 8fb331dec0e9..2ecf5ca0e6d0 100644
--- a/llvm/lib/Target/M68k/M68kInstrCompiler.td
+++ b/llvm/lib/Target/M68k/M68kInstrCompiler.td
@@ -1,4 +1,4 @@
-//===-- M68kInstrCompiler.td - Pseudos and Patterns ------*- tablegen -*-===//
+//===-- M68kInstrCompiler.td - Pseudos and Patterns --------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kInstrControl.td b/llvm/lib/Target/M68k/M68kInstrControl.td
index 9f87833ab0e2..be9045b6e0d2 100644
--- a/llvm/lib/Target/M68k/M68kInstrControl.td
+++ b/llvm/lib/Target/M68k/M68kInstrControl.td
@@ -1,4 +1,4 @@
-//===-- M68kInstrControl.td - Control Flow Instructions --*- tablegen -*-===//
+//===-- M68kInstrControl.td - Control Flow Instructions ----*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kInstrData.td b/llvm/lib/Target/M68k/M68kInstrData.td
index 40b9e4a2a7fa..3dd5d9f8c7ac 100644
--- a/llvm/lib/Target/M68k/M68kInstrData.td
+++ b/llvm/lib/Target/M68k/M68kInstrData.td
@@ -1,4 +1,4 @@
-//== M68kInstrData.td - M68k Data Movement Instructions -*- tablegen --===//
+//===-- M68kInstrData.td - M68k Data Movement Instructions -*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kInstrFormats.td b/llvm/lib/Target/M68k/M68kInstrFormats.td
index 99b7ffd17971..7e0c96a5b1f6 100644
--- a/llvm/lib/Target/M68k/M68kInstrFormats.td
+++ b/llvm/lib/Target/M68k/M68kInstrFormats.td
@@ -1,4 +1,4 @@
-//=== M68kInstrFormats.td - M68k Instruction Formats ---*- tablegen -*-===//
+//===-- M68kInstrFormats.td - M68k Instruction Formats -----*- tablegen -*-===//
// The LLVM Compiler Infrastructure
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
index 639bcd455687..105c816f9885 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- M68kInstrInfo.cpp - M68k Instruction Information ----*- C++ -*-===//
+//===-- M68kInstrInfo.cpp - M68k Instruction Information --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -348,8 +348,8 @@ void M68kInstrInfo::AddZExt(MachineBasicBlock &MBB,
bool M68kInstrInfo::ExpandMOVX_RR(MachineInstrBuilder &MIB, MVT MVTDst,
MVT MVTSrc) const {
unsigned Move = MVTDst == MVT::i16 ? M68k::MOV16rr : M68k::MOV32rr;
- unsigned Dst = MIB->getOperand(0).getReg();
- unsigned Src = MIB->getOperand(1).getReg();
+ Register Dst = MIB->getOperand(0).getReg();
+ Register Src = MIB->getOperand(1).getReg();
assert(Dst != Src && "You cannot use the same Regs with MOVX_RR");
@@ -394,8 +394,8 @@ bool M68kInstrInfo::ExpandMOVSZX_RR(MachineInstrBuilder &MIB, bool IsSigned,
else // i32
Move = M68k::MOV32rr;
- unsigned Dst = MIB->getOperand(0).getReg();
- unsigned Src = MIB->getOperand(1).getReg();
+ Register Dst = MIB->getOperand(0).getReg();
+ Register Src = MIB->getOperand(1).getReg();
assert(Dst != Src && "You cannot use the same Regs with MOVSX_RR");
@@ -437,7 +437,7 @@ bool M68kInstrInfo::ExpandMOVSZX_RM(MachineInstrBuilder &MIB, bool IsSigned,
MVT MVTSrc) const {
LLVM_DEBUG(dbgs() << "Expand " << *MIB.getInstr() << " to LOAD and ");
- unsigned Dst = MIB->getOperand(0).getReg();
+ Register Dst = MIB->getOperand(0).getReg();
// We need the subreg of Dst to make instruction verifier happy because the
// real machine instruction consumes and produces values of the same size and
@@ -559,7 +559,7 @@ bool M68kInstrInfo::ExpandMOVEM(MachineInstrBuilder &MIB,
static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
const MCInstrDesc &Desc) {
assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
- unsigned Reg = MIB->getOperand(0).getReg();
+ Register Reg = MIB->getOperand(0).getReg();
MIB->setDesc(Desc);
// MachineInstr::addOperand() will insert explicit operands before any
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.h b/llvm/lib/Target/M68k/M68kInstrInfo.h
index 6aced1487365..84d50c181ead 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.h
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.h
@@ -1,4 +1,4 @@
-//===-- M68kInstrInfo.h - M68k Instruction Information ------*- C++ -*-===//
+//===-- M68kInstrInfo.h - M68k Instruction Information ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -336,4 +336,4 @@ public:
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_M68KINSTRINFO_H
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td
index ed6cd9ecf442..c581dd91eaaa 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.td
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.td
@@ -1,4 +1,4 @@
-//== M68kInstrInfo.td - Main M68k Instruction Definition -*- tablegen -*-=//
+//===-- M68kInstrInfo.td - Main M68k Instruction Definition -*- tablegen -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -137,7 +137,7 @@ def MxSMul : SDNode<"M68kISD::SMUL", MxSDT_BiArithCCROut, [SDNPCommutative]>;
def MxUMul : SDNode<"M68kISD::UMUL", MxSDT_2BiArithCCROut, [SDNPCommutative]>;
def MxCmp : SDNode<"M68kISD::CMP", MxSDT_CmpTest>;
-def MxBt : SDNode<"M68kISD::BT", MxSDT_CmpTest>;
+def MxBtst : SDNode<"M68kISD::BTST", MxSDT_CmpTest>;
def MxCmov : SDNode<"M68kISD::CMOV", MxSDT_Cmov>;
def MxBrCond : SDNode<"M68kISD::BRCOND", MxSDT_BrCond, [SDNPHasChain]>;
@@ -587,8 +587,8 @@ class MxType<ValueType vt, string prefix, string postfix,
// qOp: Supported PCD operand
// qPat: What PCD pattern is used
MxOperand qOp, ComplexPattern qPat,
- // kOp: Supported PCD operand
- // kPat: What PCD pattern is used
+ // kOp: Supported PCI operand
+ // kPat: What PCI pattern is used
MxOperand kOp, ComplexPattern kPat,
// iOp: Supported immediate operand
// iPat: What immediate pattern is used
diff --git a/llvm/lib/Target/M68k/M68kInstrShiftRotate.td b/llvm/lib/Target/M68k/M68kInstrShiftRotate.td
index cab687638076..f1967ec11928 100644
--- a/llvm/lib/Target/M68k/M68kInstrShiftRotate.td
+++ b/llvm/lib/Target/M68k/M68kInstrShiftRotate.td
@@ -1,4 +1,4 @@
-//===------ M68kInstrShiftRotate.td - Logical Instrs -----*- tablegen -*-===//
+//===-- M68kInstrShiftRotate.td - Logical Instrs -----------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kMCInstLower.cpp b/llvm/lib/Target/M68k/M68kMCInstLower.cpp
index f14361559b13..a0b1452ee663 100644
--- a/llvm/lib/Target/M68k/M68kMCInstLower.cpp
+++ b/llvm/lib/Target/M68k/M68kMCInstLower.cpp
@@ -1,4 +1,4 @@
-//===-- M68kMCInstLower.cpp - M68k MachineInstr to MCInst ---*- C++ -*-===//
+//===-- M68kMCInstLower.cpp - M68k MachineInstr to MCInst -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kMCInstLower.h b/llvm/lib/Target/M68k/M68kMCInstLower.h
index d6160629545e..76d9a36f70ef 100644
--- a/llvm/lib/Target/M68k/M68kMCInstLower.h
+++ b/llvm/lib/Target/M68k/M68kMCInstLower.h
@@ -1,4 +1,4 @@
-//===-- M68kMCInstLower.h - Lower MachineInstr to MCInst -----*- C++ -*--===//
+//===-- M68kMCInstLower.h - Lower MachineInstr to MCInst --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -51,4 +51,4 @@ public:
};
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_M68KMCINSTLOWER_H
diff --git a/llvm/lib/Target/M68k/M68kMachineFunction.cpp b/llvm/lib/Target/M68k/M68kMachineFunction.cpp
index 3d048df7ba49..b1e7369116d7 100644
--- a/llvm/lib/Target/M68k/M68kMachineFunction.cpp
+++ b/llvm/lib/Target/M68k/M68kMachineFunction.cpp
@@ -1,4 +1,4 @@
-//===-- M68kMachineFunctionInfo.cpp - M68k private data ----*- C++ -*--===//
+//===-- M68kMachineFunctionInfo.cpp - M68k private data ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kMachineFunction.h b/llvm/lib/Target/M68k/M68kMachineFunction.h
index 5760bdd4b9e3..93c5255199d4 100644
--- a/llvm/lib/Target/M68k/M68kMachineFunction.h
+++ b/llvm/lib/Target/M68k/M68kMachineFunction.h
@@ -1,4 +1,4 @@
-//===-- M68kMachineFunctionInfo.h - M68k private data ---------*- C++ -*-=//
+//===-- M68kMachineFunctionInfo.h - M68k private data -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -111,4 +111,4 @@ private:
} // end of namespace llvm
-#endif // M68K_MACHINE_FUNCTION_INFO_H
+#endif // LLVM_LIB_TARGET_M68K_M68KMACHINEFUNCTION_H
diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
index 69d16035b1d9..0cae7ac4e312 100644
--- a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- M68kRegisterInfo.cpp - CPU0 Register Information -----*- C++ -*--===//
+//===-- M68kRegisterInfo.cpp - CPU0 Register Information --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.h b/llvm/lib/Target/M68k/M68kRegisterInfo.h
index 51b94294772c..7f822e1cb34f 100644
--- a/llvm/lib/Target/M68k/M68kRegisterInfo.h
+++ b/llvm/lib/Target/M68k/M68kRegisterInfo.h
@@ -1,4 +1,4 @@
-//===-- M68kRegisterInfo.h - M68k Register Information Impl --*- C++ --===//
+//===-- M68kRegisterInfo.h - M68k Register Information Impl -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -106,4 +106,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_M68KREGISTERINFO_H
diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.td b/llvm/lib/Target/M68k/M68kRegisterInfo.td
index e2ea2967f75b..49874a2b1099 100644
--- a/llvm/lib/Target/M68k/M68kRegisterInfo.td
+++ b/llvm/lib/Target/M68k/M68kRegisterInfo.td
@@ -1,4 +1,4 @@
-//== M68kRegisterInfo.td - M68k register definitions ----*- tablegen -*-==//
+//==-- M68kRegisterInfo.td - M68k register definitions ------*- tablegen -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kSchedule.td b/llvm/lib/Target/M68k/M68kSchedule.td
index a94cd8f31e2e..6a1bf0c6a020 100644
--- a/llvm/lib/Target/M68k/M68kSchedule.td
+++ b/llvm/lib/Target/M68k/M68kSchedule.td
@@ -1,4 +1,4 @@
-//===-- M68kSchedule.td - M68k Scheduling Definitions --*- tablegen -*-===//
+//===-- M68kSchedule.td - M68k Scheduling Definitions ------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kSubtarget.cpp b/llvm/lib/Target/M68k/M68kSubtarget.cpp
index 991889706e67..ec3830243daf 100644
--- a/llvm/lib/Target/M68k/M68kSubtarget.cpp
+++ b/llvm/lib/Target/M68k/M68kSubtarget.cpp
@@ -1,4 +1,4 @@
-//===-- M68kSubtarget.cpp - M68k Subtarget Information ------*- C++ -*-===//
+//===-- M68kSubtarget.cpp - M68k Subtarget Information ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kSubtarget.h b/llvm/lib/Target/M68k/M68kSubtarget.h
index f45cb7edca1f..9bf2984983a1 100644
--- a/llvm/lib/Target/M68k/M68kSubtarget.h
+++ b/llvm/lib/Target/M68k/M68kSubtarget.h
@@ -1,4 +1,4 @@
-//===-- M68kSubtarget.h - Define Subtarget for the M68k -----*- C++ -*-===//
+//===-- M68kSubtarget.h - Define Subtarget for the M68k ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,8 +11,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_CPU0_M68KSUBTARGET_H
-#define LLVM_LIB_TARGET_CPU0_M68KSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_M68K_M68KSUBTARGET_H
+#define LLVM_LIB_TARGET_M68K_M68KSUBTARGET_H
#include "M68kFrameLowering.h"
#include "M68kISelLowering.h"
@@ -179,4 +179,4 @@ public:
};
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_M68KSUBTARGET_H
diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/llvm/lib/Target/M68k/M68kTargetMachine.cpp
index e8126c6219e8..fd21fe6bcea8 100644
--- a/llvm/lib/Target/M68k/M68kTargetMachine.cpp
+++ b/llvm/lib/Target/M68k/M68kTargetMachine.cpp
@@ -1,4 +1,4 @@
-//===-- M68kTargetMachine.cpp - M68k target machine ---------*- C++ -*-===//
+//===-- M68kTargetMachine.cpp - M68k Target Machine -------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.h b/llvm/lib/Target/M68k/M68kTargetMachine.h
index 34fae8e45504..8dda720774e7 100644
--- a/llvm/lib/Target/M68k/M68kTargetMachine.h
+++ b/llvm/lib/Target/M68k/M68kTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- M68kTargetMachine.h - Define TargetMachine for M68k ----- C++ -===//
+//===-- M68kTargetMachine.h - Define TargetMachine for M68k -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -53,4 +53,4 @@ public:
};
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_M68KTARGETMACHINE_H
diff --git a/llvm/lib/Target/M68k/M68kTargetObjectFile.cpp b/llvm/lib/Target/M68k/M68kTargetObjectFile.cpp
index 3e26b37e7760..4986d5dbebb9 100644
--- a/llvm/lib/Target/M68k/M68kTargetObjectFile.cpp
+++ b/llvm/lib/Target/M68k/M68kTargetObjectFile.cpp
@@ -1,4 +1,4 @@
-//===-- M68kELFTargetObjectFile.cpp - M68k Object Files -----*- C++ -*-===//
+//===-- M68kELFTargetObjectFile.cpp - M68k Object Files ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/M68kTargetObjectFile.h b/llvm/lib/Target/M68k/M68kTargetObjectFile.h
index dbc5375d5423..80a7d0d6e120 100644
--- a/llvm/lib/Target/M68k/M68kTargetObjectFile.h
+++ b/llvm/lib/Target/M68k/M68kTargetObjectFile.h
@@ -1,4 +1,4 @@
-//===-- M68kELFTargetObjectFile.h - M68k Object Info ---------*- C++ -====//
+//===-- M68kELFTargetObjectFile.h - M68k Object Info ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -28,4 +28,4 @@ public:
};
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_M68KTARGETOBJECTFILE_H
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
index c1f88fb78ee1..b66557ec6c3a 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
@@ -1,4 +1,4 @@
-//===-- M68kAsmBackend.cpp - M68k Assembler Backend ---------*- C++ -*-===//
+//===-- M68kAsmBackend.cpp - M68k Assembler Backend -------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h
index 7c56cfdf3123..4883f647e214 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h
@@ -1,4 +1,4 @@
-//===-- M68kBaseInfo.h - Top level definitions for M68k MC --*- C++ -*-----===//
+//===-- M68kBaseInfo.h - Top level definitions for M68k MC ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -244,4 +244,4 @@ static inline unsigned getMaskedSpillRegister(unsigned order) {
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_MCTARGETDESC_M68KBASEINFO_H
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp
index 4c9a3297424d..27f1b3a3fac8 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp
@@ -1,4 +1,4 @@
-//===---------- M68kELFObjectWriter.cpp - M68k ELF Writer ---*- C++ -*-===//
+//===-- M68kELFObjectWriter.cpp - M68k ELF Writer ---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kFixupKinds.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kFixupKinds.h
index 2b760dec9e41..54a0e98fea6e 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kFixupKinds.h
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kFixupKinds.h
@@ -1,4 +1,4 @@
-//===-- M68kFixupKinds.h - M68k Specific Fixup Entries ------*- C++ -*-===//
+//===-- M68kFixupKinds.h - M68k Specific Fixup Entries ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -51,4 +51,4 @@ static inline MCFixupKind getFixupForSize(unsigned Size, bool isPCRel) {
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68k_MCTARGETDESC_M68kFIXUPKINDS_H
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
index a2e41437ee21..9ba28622b5b5 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- M68kInstPrinter.cpp - Convert M68k MCInst to asm ----*- C++ -*-===//
+//===-- M68kInstPrinter.cpp - Convert M68k MCInst to asm --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h
index ec26bc4ddbfd..239268dd7159 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- M68kInstPrinter.h - Convert M68k MCInst to asm ------*- C++ -*-===//
+//===-- M68kInstPrinter.h - Convert M68k MCInst to asm ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -166,4 +166,4 @@ private:
};
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_INSTPRINTER_M68KINSTPRINTER_H
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp
index ee2041012bb9..005d2d38f53d 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- M68kMCAsmInfo.cpp - M68k Asm Properties -------------*- C++ -*-===//
+//===-- M68kMCAsmInfo.cpp - M68k Asm Properties -----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.h
index b3a58cc61223..873264d88674 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.h
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.h
@@ -1,4 +1,4 @@
-//===-- M68kMCAsmInfo.h - M68k Asm Info --------------------*- C++ -*--===//
+//===-- M68kMCAsmInfo.h - M68k Asm Info -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -28,4 +28,4 @@ public:
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_MCTARGETDESC_M68KMCASMINFO_H
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
index 9708abaadf98..9227bd6c3a78 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- M68kMCCodeEmitter.cpp - Convert M68k code emitter ---*- C++ -*-===//
+//===-- M68kMCCodeEmitter.cpp - Convert M68k code emitter -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h
index 242a1297206a..aba705aa54b6 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h
@@ -1,4 +1,4 @@
-//===-- M68kMCCodeEmitter.h - M68k Code Emitter ----------------*- C++ -*--===//
+//===-- M68kMCCodeEmitter.h - M68k Code Emitter -----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -25,4 +25,4 @@ const uint8_t *getMCInstrBeads(unsigned);
} // namespace M68k
} // namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_M68K_MCTARGETDESC_M68KMCCODEEMITTER_H
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
index 9f4db895a821..2606e22410fc 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- M68kMCTargetDesc.cpp - M68k Target Descriptions -----*- C++ -*-===//
+//===-- M68kMCTargetDesc.cpp - M68k Target Descriptions ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h
index a0ebca0ce36c..aa53e13af4fc 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- M68kMCTargetDesc.h - M68k Target Descriptions -------*- C++ -*-===//
+//===-- M68kMCTargetDesc.h - M68k Target Descriptions -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -58,4 +58,4 @@ std::unique_ptr<MCObjectTargetWriter> createM68kELFObjectWriter(uint8_t OSABI);
#define GET_SUBTARGETINFO_ENUM
#include "M68kGenSubtargetInfo.inc"
-#endif
+#endif // LLVM_LIB_TARGET_M68K_MCTARGETDESC_M68KMCTARGETDESC_H
diff --git a/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp b/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp
index 2a225b8a43cd..4701f46b0298 100644
--- a/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp
+++ b/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- M68kTargetInfo.cpp - M68k Target Implementation -----*- C++ -*-===//
+//===-- M68kTargetInfo.cpp - M68k Target Implementation ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index c1677baf52a7..13cba8b079a9 100644
--- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -114,13 +114,14 @@ class MSP430Operand : public MCParsedAsmOperand {
public:
MSP430Operand(StringRef Tok, SMLoc const &S)
- : Base(), Kind(k_Tok), Tok(Tok), Start(S), End(S) {}
+ : Kind(k_Tok), Tok(Tok), Start(S), End(S) {}
MSP430Operand(KindTy Kind, unsigned Reg, SMLoc const &S, SMLoc const &E)
- : Base(), Kind(Kind), Reg(Reg), Start(S), End(E) {}
+ : Kind(Kind), Reg(Reg), Start(S), End(E) {}
MSP430Operand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
- : Base(), Kind(k_Imm), Imm(Imm), Start(S), End(E) {}
- MSP430Operand(unsigned Reg, MCExpr const *Expr, SMLoc const &S, SMLoc const &E)
- : Base(), Kind(k_Mem), Mem({Reg, Expr}), Start(S), End(E) {}
+ : Kind(k_Imm), Imm(Imm), Start(S), End(E) {}
+ MSP430Operand(unsigned Reg, MCExpr const *Expr, SMLoc const &S,
+ SMLoc const &E)
+ : Kind(k_Mem), Mem({Reg, Expr}), Start(S), End(E) {}
void addRegOperands(MCInst &Inst, unsigned N) const {
assert((Kind == k_Reg || Kind == k_IndReg || Kind == k_PostIndReg) &&
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
index 0cdd1f4f701f..bb5351af6523 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
@@ -9,7 +9,6 @@
#include "MCTargetDesc/MSP430FixupKinds.h"
#include "MCTargetDesc/MSP430MCTargetDesc.h"
-#include "MCTargetDesc/MSP430MCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCObjectWriter.h"
diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
index 4ef9a567d453..6a8dc3502496 100644
--- a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -190,7 +190,7 @@ bool MSP430FrameLowering::spillCalleeSavedRegisters(
MFI->setCalleeSavedFrameSize(CSI.size() * 2);
for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
BuildMI(MBB, MI, DL, TII.get(MSP430::PUSH16r))
diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index c64a44a0ef95..aebfc6b0ae2e 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -705,7 +705,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
if (Ins[i].Flags.isSRet()) {
- unsigned Reg = FuncInfo->getSRetReturnReg();
+ Register Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
Reg = MF.getRegInfo().createVirtualRegister(
getRegClassFor(MVT::i16));
@@ -772,7 +772,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (MF.getFunction().hasStructRetAttr()) {
MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
- unsigned Reg = FuncInfo->getSRetReturnReg();
+ Register Reg = FuncInfo->getSRetReturnReg();
if (!Reg)
llvm_unreachable("sret virtual register not created in entry block");
@@ -1402,12 +1402,12 @@ bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
bool MSP430TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
// MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
- return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
+ return false && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
}
bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
// MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
- return 0 && VT1 == MVT::i8 && VT2 == MVT::i16;
+ return false && VT1 == MVT::i8 && VT2 == MVT::i16;
}
bool MSP430TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
diff --git a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
index 2fd58717c4db..0604d47597e2 100644
--- a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -57,5 +57,5 @@ MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
MSP430Subtarget::MSP430Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
- : MSP430GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), FrameLowering(),
+ : MSP430GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {}
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 01b5dff2e448..736c41f8ac03 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -827,8 +827,7 @@ private:
} Kind;
public:
- MipsOperand(KindTy K, MipsAsmParser &Parser)
- : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {}
+ MipsOperand(KindTy K, MipsAsmParser &Parser) : Kind(K), AsmParser(Parser) {}
~MipsOperand() override {
switch (Kind) {
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index bfe413a152b6..a3dbe6f84a1e 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -197,7 +197,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = (int64_t)Value / 2;
// We now check if Value can be encoded as a 26-bit signed immediate.
if (!isInt<26>(Value)) {
- Ctx.reportFatalError(Fixup.getLoc(), "out of range PC26 fixup");
+ Ctx.reportError(Fixup.getLoc(), "out of range PC26 fixup");
return 0;
}
break;
diff --git a/llvm/lib/Target/Mips/Mips.h b/llvm/lib/Target/Mips/Mips.h
index b3faaab436f0..faf58545db62 100644
--- a/llvm/lib/Target/Mips/Mips.h
+++ b/llvm/lib/Target/Mips/Mips.h
@@ -38,6 +38,7 @@ namespace llvm {
FunctionPass *createMicroMipsSizeReducePass();
FunctionPass *createMipsExpandPseudoPass();
FunctionPass *createMipsPreLegalizeCombiner();
+ FunctionPass *createMipsMulMulBugPass();
InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &,
MipsSubtarget &,
@@ -47,6 +48,7 @@ namespace llvm {
void initializeMipsBranchExpansionPass(PassRegistry &);
void initializeMicroMipsSizeReducePass(PassRegistry &);
void initializeMipsPreLegalizerCombinerPass(PassRegistry&);
+ void initializeMipsMulMulBugFixPass(PassRegistry&);
} // end namespace llvm;
#endif
diff --git a/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
index 622f2039f9e4..4f4e3f3f2ed7 100644
--- a/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -74,7 +74,7 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
for (const CalleeSavedInfo &I : CSI) {
int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
unsigned DReg = MRI->getDwarfRegNum(Reg, true);
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DReg, Offset));
@@ -124,7 +124,7 @@ bool Mips16FrameLowering::spillCalleeSavedRegisters(
// method MipsTargetLowering::lowerRETURNADDR.
// It's killed at the spill, unless the register is RA and return address
// is taken.
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA)
&& MF->getFrameInfo().isReturnAddressTaken();
if (!IsRAAndRetAddrIsTaken)
diff --git a/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index ddd28d095e51..50147c019bfd 100644
--- a/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -121,7 +121,7 @@ bool Mips16DAGToDAGISel::selectAddr(bool SPAllowed, SDValue Addr, SDValue &Base,
}
// Addresses of the form FI+const or FI|const
if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ auto *CN = cast<ConstantSDNode>(Addr.getOperand(1));
if (isInt<16>(CN->getSExtValue())) {
// If the first operand is a FI, get the TargetFI Node
if (SPAllowed) {
diff --git a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 136612c59d96..78ffe00c020c 100644
--- a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -451,7 +451,7 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
// So for now we always save S2. The optimization will be done
// in a follow-on patch.
//
- if (1 || (Signature->RetSig != Mips16HardFloatInfo::NoFPRet))
+ if (true || (Signature->RetSig != Mips16HardFloatInfo::NoFPRet))
FuncInfo->setSaveS2();
}
// one more look at list of intrinsics
diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index 3403ec01aef2..02d0e770ba66 100644
--- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -190,7 +190,7 @@ static void addSaveRestoreRegs(MachineInstrBuilder &MIB,
// method MipsTargetLowering::lowerRETURNADDR.
// It's killed at the spill, unless the register is RA and return address
// is taken.
- unsigned Reg = CSI[e-i-1].getReg();
+ Register Reg = CSI[e-i-1].getReg();
switch (Reg) {
case Mips::RA:
case Mips::S0:
diff --git a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
index f6f43da9abf8..563118dfe627 100644
--- a/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips16-registerinfo"
-Mips16RegisterInfo::Mips16RegisterInfo() : MipsRegisterInfo() {}
+Mips16RegisterInfo::Mips16RegisterInfo() {}
bool Mips16RegisterInfo::requiresRegisterScavenging
(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp
index ae2b83c414db..33da0ff31be8 100644
--- a/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp
+++ b/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -25,8 +25,8 @@ void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) {
return;
}
- for (InstSeqLs::iterator Iter = SeqLs.begin(); Iter != SeqLs.end(); ++Iter)
- Iter->push_back(I);
+ for (auto &S : SeqLs)
+ S.push_back(I);
}
void MipsAnalyzeImmediate::GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize,
diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 5d026785b921..4bd8845e9cb9 100644
--- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -80,13 +80,9 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
MipsFI = MF.getInfo<MipsFunctionInfo>();
if (Subtarget->inMips16Mode())
- for (std::map<
- const char *,
- const Mips16HardFloatInfo::FuncSignature *>::const_iterator
- it = MipsFI->StubsNeeded.begin();
- it != MipsFI->StubsNeeded.end(); ++it) {
- const char *Symbol = it->first;
- const Mips16HardFloatInfo::FuncSignature *Signature = it->second;
+ for (const auto &I : MipsFI->StubsNeeded) {
+ const char *Symbol = I.first;
+ const Mips16HardFloatInfo::FuncSignature *Signature = I.second;
if (StubsNeeded.find(Symbol) == StubsNeeded.end())
StubsNeeded[Symbol] = Signature;
}
@@ -341,7 +337,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
unsigned CSFPRegsSize = 0;
for (const auto &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
unsigned RegNum = TRI->getEncodingValue(Reg);
// If it's a floating point register, set the FPU Bitmask.
@@ -1279,11 +1275,11 @@ void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) {
// Align all blocks that are jumped to through jump table.
if (MachineJumpTableInfo *JtInfo = MF.getJumpTableInfo()) {
const std::vector<MachineJumpTableEntry> &JT = JtInfo->getJumpTables();
- for (unsigned I = 0; I < JT.size(); ++I) {
- const std::vector<MachineBasicBlock*> &MBBs = JT[I].MBBs;
+ for (const auto &I : JT) {
+ const std::vector<MachineBasicBlock *> &MBBs = I.MBBs;
- for (unsigned J = 0; J < MBBs.size(); ++J)
- MBBs[J]->setAlignment(MIPS_NACL_BUNDLE_ALIGN);
+ for (MachineBasicBlock *MBB : MBBs)
+ MBB->setAlignment(MIPS_NACL_BUNDLE_ALIGN);
}
}
diff --git a/llvm/lib/Target/Mips/MipsCallLowering.h b/llvm/lib/Target/Mips/MipsCallLowering.h
index 1d1406da3201..9f114d55db4c 100644
--- a/llvm/lib/Target/Mips/MipsCallLowering.h
+++ b/llvm/lib/Target/Mips/MipsCallLowering.h
@@ -18,7 +18,6 @@
namespace llvm {
-class MachineMemOperand;
class MipsTargetLowering;
class MipsCallLowering : public CallLowering {
diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index 491d379bfe0b..1efbf5570287 100644
--- a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -604,9 +604,9 @@ MipsConstantIslands::CPEntry
std::vector<CPEntry> &CPEs = CPEntries[CPI];
// Number of entries per constpool index should be small, just do a
// linear search.
- for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
- if (CPEs[i].CPEMI == CPEMI)
- return &CPEs[i];
+ for (CPEntry &CPE : CPEs) {
+ if (CPE.CPEMI == CPEMI)
+ return &CPE;
}
return nullptr;
}
@@ -1052,27 +1052,27 @@ int MipsConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
// No. Look for previously created clones of the CPE that are in range.
unsigned CPI = CPEMI->getOperand(1).getIndex();
std::vector<CPEntry> &CPEs = CPEntries[CPI];
- for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ for (CPEntry &CPE : CPEs) {
// We already tried this one
- if (CPEs[i].CPEMI == CPEMI)
+ if (CPE.CPEMI == CPEMI)
continue;
// Removing CPEs can leave empty entries, skip
- if (CPEs[i].CPEMI == nullptr)
+ if (CPE.CPEMI == nullptr)
continue;
- if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
- U.NegOk)) {
- LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
- << CPEs[i].CPI << "\n");
+ if (isCPEntryInRange(UserMI, UserOffset, CPE.CPEMI, U.getMaxDisp(),
+ U.NegOk)) {
+ LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPE.CPI
+ << "\n");
// Point the CPUser node to the replacement
- U.CPEMI = CPEs[i].CPEMI;
+ U.CPEMI = CPE.CPEMI;
// Change the CPI in the instruction operand to refer to the clone.
for (MachineOperand &MO : UserMI->operands())
if (MO.isCPI()) {
- MO.setIndex(CPEs[i].CPI);
+ MO.setIndex(CPE.CPI);
break;
}
// Adjust the refcount of the clone...
- CPEs[i].RefCount++;
+ CPE.RefCount++;
// ...and the original. If we didn't remove the old entry, none of the
// addresses changed, so we don't need another pass.
return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
@@ -1108,27 +1108,27 @@ int MipsConstantIslands::findLongFormInRangeCPEntry
// No. Look for previously created clones of the CPE that are in range.
unsigned CPI = CPEMI->getOperand(1).getIndex();
std::vector<CPEntry> &CPEs = CPEntries[CPI];
- for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ for (CPEntry &CPE : CPEs) {
// We already tried this one
- if (CPEs[i].CPEMI == CPEMI)
+ if (CPE.CPEMI == CPEMI)
continue;
// Removing CPEs can leave empty entries, skip
- if (CPEs[i].CPEMI == nullptr)
+ if (CPE.CPEMI == nullptr)
continue;
- if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI,
- U.getLongFormMaxDisp(), U.NegOk)) {
- LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
- << CPEs[i].CPI << "\n");
+ if (isCPEntryInRange(UserMI, UserOffset, CPE.CPEMI, U.getLongFormMaxDisp(),
+ U.NegOk)) {
+ LLVM_DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPE.CPI
+ << "\n");
// Point the CPUser node to the replacement
- U.CPEMI = CPEs[i].CPEMI;
+ U.CPEMI = CPE.CPEMI;
// Change the CPI in the instruction operand to refer to the clone.
for (MachineOperand &MO : UserMI->operands())
if (MO.isCPI()) {
- MO.setIndex(CPEs[i].CPI);
+ MO.setIndex(CPE.CPI);
break;
}
// Adjust the refcount of the clone...
- CPEs[i].RefCount++;
+ CPE.RefCount++;
// ...and the original. If we didn't remove the old entry, none of the
// addresses changed, so we don't need another pass.
return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
@@ -1435,15 +1435,14 @@ void MipsConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
/// are zero.
bool MipsConstantIslands::removeUnusedCPEntries() {
unsigned MadeChange = false;
- for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
- std::vector<CPEntry> &CPEs = CPEntries[i];
- for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
- if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
- removeDeadCPEMI(CPEs[j].CPEMI);
- CPEs[j].CPEMI = nullptr;
- MadeChange = true;
- }
+ for (std::vector<CPEntry> &CPEs : CPEntries) {
+ for (CPEntry &CPE : CPEs) {
+ if (CPE.RefCount == 0 && CPE.CPEMI) {
+ removeDeadCPEMI(CPE.CPEMI);
+ CPE.CPEMI = nullptr;
+ MadeChange = true;
}
+ }
}
return MadeChange;
}
diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 2d27d7553de6..cf6cec22308c 100644
--- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -309,12 +309,12 @@ INITIALIZE_PASS(MipsDelaySlotFiller, DEBUG_TYPE,
static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) {
MachineFunction *MF = Filler->getParent()->getParent();
- for (BB2BrMap::const_iterator I = BrMap.begin(); I != BrMap.end(); ++I) {
- if (I->second) {
- MIBundleBuilder(I->second).append(MF->CloneMachineInstr(&*Filler));
+ for (const auto &I : BrMap) {
+ if (I.second) {
+ MIBundleBuilder(I.second).append(MF->CloneMachineInstr(&*Filler));
++UsefulSlots;
} else {
- I->first->insert(I->first->end(), MF->CloneMachineInstr(&*Filler));
+ I.first->push_back(MF->CloneMachineInstr(&*Filler));
}
}
}
diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index 05c1c06ffefe..6ddfec5d0f79 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -313,7 +313,7 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
llvm_unreachable("unexpected opcode");
}
- unsigned LHSReg = getRegForValue(LHS);
+ Register LHSReg = getRegForValue(LHS);
if (!LHSReg)
return 0;
@@ -325,7 +325,7 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
if (!RHSReg)
return 0;
- unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Register ResultReg = createResultReg(&Mips::GPR32RegClass);
if (!ResultReg)
return 0;
@@ -341,7 +341,7 @@ unsigned MipsFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Register ResultReg = createResultReg(&Mips::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LEA_ADDiu),
ResultReg)
.addFrameIndex(SI->second)
@@ -362,7 +362,7 @@ unsigned MipsFastISel::materializeInt(const Constant *C, MVT VT) {
unsigned MipsFastISel::materialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
if (isInt<16>(Imm)) {
unsigned Opc = Mips::ADDiu;
@@ -376,7 +376,7 @@ unsigned MipsFastISel::materialize32BitInt(int64_t Imm,
unsigned Hi = (Imm >> 16) & 0xFFFF;
if (Lo) {
// Both Lo and Hi have nonzero bits.
- unsigned TmpReg = createResultReg(RC);
+ Register TmpReg = createResultReg(RC);
emitInst(Mips::LUi, TmpReg).addImm(Hi);
emitInst(Mips::ORi, ResultReg).addReg(TmpReg).addImm(Lo);
} else {
@@ -391,13 +391,13 @@ unsigned MipsFastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
int64_t Imm = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
if (VT == MVT::f32) {
const TargetRegisterClass *RC = &Mips::FGR32RegClass;
- unsigned DestReg = createResultReg(RC);
+ Register DestReg = createResultReg(RC);
unsigned TempReg = materialize32BitInt(Imm, &Mips::GPR32RegClass);
emitInst(Mips::MTC1, DestReg).addReg(TempReg);
return DestReg;
} else if (VT == MVT::f64) {
const TargetRegisterClass *RC = &Mips::AFGR64RegClass;
- unsigned DestReg = createResultReg(RC);
+ Register DestReg = createResultReg(RC);
unsigned TempReg1 = materialize32BitInt(Imm >> 32, &Mips::GPR32RegClass);
unsigned TempReg2 =
materialize32BitInt(Imm & 0xFFFFFFFF, &Mips::GPR32RegClass);
@@ -412,7 +412,7 @@ unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) {
if (VT != MVT::i32)
return 0;
const TargetRegisterClass *RC = &Mips::GPR32RegClass;
- unsigned DestReg = createResultReg(RC);
+ Register DestReg = createResultReg(RC);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
bool IsThreadLocal = GVar && GVar->isThreadLocal();
// TLS not supported at this time.
@@ -423,7 +423,7 @@ unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) {
.addGlobalAddress(GV, 0, MipsII::MO_GOT);
if ((GV->hasInternalLinkage() ||
(GV->hasLocalLinkage() && !isa<Function>(GV)))) {
- unsigned TempReg = createResultReg(RC);
+ Register TempReg = createResultReg(RC);
emitInst(Mips::ADDiu, TempReg)
.addReg(DestReg)
.addGlobalAddress(GV, 0, MipsII::MO_ABS_LO);
@@ -434,7 +434,7 @@ unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) {
unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) {
const TargetRegisterClass *RC = &Mips::GPR32RegClass;
- unsigned DestReg = createResultReg(RC);
+ Register DestReg = createResultReg(RC);
emitInst(Mips::LW, DestReg)
.addReg(MFI->getGlobalBaseReg(*MF))
.addSym(Sym, MipsII::MO_GOT);
@@ -649,13 +649,13 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
default:
return false;
case CmpInst::ICMP_EQ: {
- unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ Register TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg);
emitInst(Mips::SLTiu, ResultReg).addReg(TempReg).addImm(1);
break;
}
case CmpInst::ICMP_NE: {
- unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ Register TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg);
emitInst(Mips::SLTu, ResultReg).addReg(Mips::ZERO).addReg(TempReg);
break;
@@ -667,13 +667,13 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
emitInst(Mips::SLTu, ResultReg).addReg(LeftReg).addReg(RightReg);
break;
case CmpInst::ICMP_UGE: {
- unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ Register TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::SLTu, TempReg).addReg(LeftReg).addReg(RightReg);
emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
break;
}
case CmpInst::ICMP_ULE: {
- unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ Register TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::SLTu, TempReg).addReg(RightReg).addReg(LeftReg);
emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
break;
@@ -685,13 +685,13 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
emitInst(Mips::SLT, ResultReg).addReg(LeftReg).addReg(RightReg);
break;
case CmpInst::ICMP_SGE: {
- unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ Register TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::SLT, TempReg).addReg(LeftReg).addReg(RightReg);
emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
break;
}
case CmpInst::ICMP_SLE: {
- unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ Register TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::SLT, TempReg).addReg(RightReg).addReg(LeftReg);
emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
break;
@@ -737,8 +737,8 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
default:
llvm_unreachable("Only switching of a subset of CCs.");
}
- unsigned RegWithZero = createResultReg(&Mips::GPR32RegClass);
- unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass);
+ Register RegWithZero = createResultReg(&Mips::GPR32RegClass);
+ Register RegWithOne = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0);
emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1);
emitInst(Opc).addReg(Mips::FCC0, RegState::Define).addReg(LeftReg)
@@ -964,7 +964,7 @@ bool MipsFastISel::selectBranch(const Instruction *I) {
// For the general case, we need to mask with 1.
if (ZExtCondReg == 0) {
- unsigned CondReg = getRegForValue(BI->getCondition());
+ Register CondReg = getRegForValue(BI->getCondition());
if (CondReg == 0)
return false;
@@ -982,7 +982,7 @@ bool MipsFastISel::selectBranch(const Instruction *I) {
bool MipsFastISel::selectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
- unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Register ResultReg = createResultReg(&Mips::GPR32RegClass);
if (!emitCmp(ResultReg, CI))
return false;
updateValueMap(I, ResultReg);
@@ -1000,13 +1000,13 @@ bool MipsFastISel::selectFPExt(const Instruction *I) {
if (SrcVT != MVT::f32 || DestVT != MVT::f64)
return false;
- unsigned SrcReg =
+ Register SrcReg =
getRegForValue(Src); // this must be a 32bit floating point register class
// maybe we should handle this differently
if (!SrcReg)
return false;
- unsigned DestReg = createResultReg(&Mips::AFGR64RegClass);
+ Register DestReg = createResultReg(&Mips::AFGR64RegClass);
emitInst(Mips::CVT_D32_S, DestReg).addReg(SrcReg);
updateValueMap(I, DestReg);
return true;
@@ -1041,22 +1041,22 @@ bool MipsFastISel::selectSelect(const Instruction *I) {
const SelectInst *SI = cast<SelectInst>(I);
const Value *Cond = SI->getCondition();
- unsigned Src1Reg = getRegForValue(SI->getTrueValue());
- unsigned Src2Reg = getRegForValue(SI->getFalseValue());
- unsigned CondReg = getRegForValue(Cond);
+ Register Src1Reg = getRegForValue(SI->getTrueValue());
+ Register Src2Reg = getRegForValue(SI->getFalseValue());
+ Register CondReg = getRegForValue(Cond);
if (!Src1Reg || !Src2Reg || !CondReg)
return false;
- unsigned ZExtCondReg = createResultReg(&Mips::GPR32RegClass);
+ Register ZExtCondReg = createResultReg(&Mips::GPR32RegClass);
if (!ZExtCondReg)
return false;
if (!emitIntExt(MVT::i1, CondReg, MVT::i32, ZExtCondReg, true))
return false;
- unsigned ResultReg = createResultReg(RC);
- unsigned TempReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
+ Register TempReg = createResultReg(RC);
if (!ResultReg || !TempReg)
return false;
@@ -1079,11 +1079,11 @@ bool MipsFastISel::selectFPTrunc(const Instruction *I) {
if (SrcVT != MVT::f64 || DestVT != MVT::f32)
return false;
- unsigned SrcReg = getRegForValue(Src);
+ Register SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
- unsigned DestReg = createResultReg(&Mips::FGR32RegClass);
+ Register DestReg = createResultReg(&Mips::FGR32RegClass);
if (!DestReg)
return false;
@@ -1115,14 +1115,14 @@ bool MipsFastISel::selectFPToInt(const Instruction *I, bool IsSigned) {
if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
return false;
- unsigned SrcReg = getRegForValue(Src);
+ Register SrcReg = getRegForValue(Src);
if (SrcReg == 0)
return false;
// Determine the opcode for the conversion, which takes place
// entirely within FPRs.
- unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
- unsigned TempReg = createResultReg(&Mips::FGR32RegClass);
+ Register DestReg = createResultReg(&Mips::GPR32RegClass);
+ Register TempReg = createResultReg(&Mips::FGR32RegClass);
unsigned Opc = (SrcVT == MVT::f32) ? Mips::TRUNC_W_S : Mips::TRUNC_W_D32;
// Generate the convert.
@@ -1196,7 +1196,7 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
break;
}
}
- unsigned ArgReg = getRegForValue(ArgVal);
+ Register ArgReg = getRegForValue(ArgVal);
if (!ArgReg)
return false;
@@ -1294,7 +1294,7 @@ bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
CopyVT = MVT::i32;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
if (!ResultReg)
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1462,11 +1462,11 @@ bool MipsFastISel::fastLowerArguments() {
for (const auto &FormalArg : F->args()) {
unsigned ArgNo = FormalArg.getArgNo();
unsigned SrcReg = Allocation[ArgNo].Reg;
- unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, Allocation[ArgNo].RC);
+ Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, Allocation[ArgNo].RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
// Without this, EmitLiveInCopies may eliminate the livein if its only
// use is a bitcast (which isn't turned into an instruction).
- unsigned ResultReg = createResultReg(Allocation[ArgNo].RC);
+ Register ResultReg = createResultReg(Allocation[ArgNo].RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(DstReg, getKillRegState(true));
@@ -1594,10 +1594,10 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (!isTypeSupported(RetTy, VT))
return false;
- unsigned SrcReg = getRegForValue(II->getOperand(0));
+ Register SrcReg = getRegForValue(II->getOperand(0));
if (SrcReg == 0)
return false;
- unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
+ Register DestReg = createResultReg(&Mips::GPR32RegClass);
if (DestReg == 0)
return false;
if (VT == MVT::i16) {
@@ -1607,9 +1607,9 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
return true;
} else {
unsigned TempReg[3];
- for (int i = 0; i < 3; i++) {
- TempReg[i] = createResultReg(&Mips::GPR32RegClass);
- if (TempReg[i] == 0)
+ for (unsigned &R : TempReg) {
+ R = createResultReg(&Mips::GPR32RegClass);
+ if (R == 0)
return false;
}
emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8);
@@ -1621,16 +1621,16 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
}
} else if (VT == MVT::i32) {
if (Subtarget->hasMips32r2()) {
- unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ Register TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::WSBH, TempReg).addReg(SrcReg);
emitInst(Mips::ROTR, DestReg).addReg(TempReg).addImm(16);
updateValueMap(II, DestReg);
return true;
} else {
unsigned TempReg[8];
- for (int i = 0; i < 8; i++) {
- TempReg[i] = createResultReg(&Mips::GPR32RegClass);
- if (TempReg[i] == 0)
+ for (unsigned &R : TempReg) {
+ R = createResultReg(&Mips::GPR32RegClass);
+ if (R == 0)
return false;
}
@@ -1720,7 +1720,7 @@ bool MipsFastISel::selectRet(const Instruction *I) {
if (!VA.isRegLoc())
return false;
- unsigned Reg = getRegForValue(RV);
+ Register Reg = getRegForValue(RV);
if (Reg == 0)
return false;
@@ -1788,7 +1788,7 @@ bool MipsFastISel::selectTrunc(const Instruction *I) {
if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
return false;
- unsigned SrcReg = getRegForValue(Op);
+ Register SrcReg = getRegForValue(Op);
if (!SrcReg)
return false;
@@ -1804,7 +1804,7 @@ bool MipsFastISel::selectIntExt(const Instruction *I) {
Type *SrcTy = Src->getType();
bool isZExt = isa<ZExtInst>(I);
- unsigned SrcReg = getRegForValue(Src);
+ Register SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
@@ -1818,7 +1818,7 @@ bool MipsFastISel::selectIntExt(const Instruction *I) {
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DestVT = DestEVT.getSimpleVT();
- unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Register ResultReg = createResultReg(&Mips::GPR32RegClass);
if (!emitIntExt(SrcVT, SrcReg, DestVT, ResultReg, isZExt))
return false;
@@ -1839,7 +1839,7 @@ bool MipsFastISel::emitIntSExt32r1(MVT SrcVT, unsigned SrcReg, MVT DestVT,
ShiftAmt = 16;
break;
}
- unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ Register TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::SLL, TempReg).addReg(SrcReg).addImm(ShiftAmt);
emitInst(Mips::SRA, DestReg).addReg(TempReg).addImm(ShiftAmt);
return true;
@@ -1935,15 +1935,15 @@ bool MipsFastISel::selectDivRem(const Instruction *I, unsigned ISDOpcode) {
break;
}
- unsigned Src0Reg = getRegForValue(I->getOperand(0));
- unsigned Src1Reg = getRegForValue(I->getOperand(1));
+ Register Src0Reg = getRegForValue(I->getOperand(0));
+ Register Src1Reg = getRegForValue(I->getOperand(1));
if (!Src0Reg || !Src1Reg)
return false;
emitInst(DivOpc).addReg(Src0Reg).addReg(Src1Reg);
emitInst(Mips::TEQ).addReg(Src1Reg).addReg(Mips::ZERO).addImm(7);
- unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Register ResultReg = createResultReg(&Mips::GPR32RegClass);
if (!ResultReg)
return false;
@@ -1962,19 +1962,19 @@ bool MipsFastISel::selectShift(const Instruction *I) {
if (!isTypeSupported(I->getType(), RetVT))
return false;
- unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Register ResultReg = createResultReg(&Mips::GPR32RegClass);
if (!ResultReg)
return false;
unsigned Opcode = I->getOpcode();
const Value *Op0 = I->getOperand(0);
- unsigned Op0Reg = getRegForValue(Op0);
+ Register Op0Reg = getRegForValue(Op0);
if (!Op0Reg)
return false;
// If AShr or LShr, then we need to make sure the operand0 is sign extended.
if (Opcode == Instruction::AShr || Opcode == Instruction::LShr) {
- unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ Register TempReg = createResultReg(&Mips::GPR32RegClass);
if (!TempReg)
return false;
@@ -2008,7 +2008,7 @@ bool MipsFastISel::selectShift(const Instruction *I) {
return true;
}
- unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ Register Op1Reg = getRegForValue(I->getOperand(1));
if (!Op1Reg)
return false;
@@ -2091,7 +2091,7 @@ bool MipsFastISel::fastSelectInstruction(const Instruction *I) {
unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V,
bool IsUnsigned) {
- unsigned VReg = getRegForValue(V);
+ Register VReg = getRegForValue(V);
if (VReg == 0)
return 0;
MVT VMVT = TLI.getValueType(DL, V->getType(), true).getSimpleVT();
@@ -2100,7 +2100,7 @@ unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V,
return 0;
if ((VMVT == MVT::i8) || (VMVT == MVT::i16)) {
- unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
+ Register TempReg = createResultReg(&Mips::GPR32RegClass);
if (!emitIntExt(VMVT, VReg, MVT::i32, TempReg, IsUnsigned))
return 0;
VReg = TempReg;
@@ -2112,7 +2112,7 @@ void MipsFastISel::simplifyAddress(Address &Addr) {
if (!isInt<16>(Addr.getOffset())) {
unsigned TempReg =
materialize32BitInt(Addr.getOffset(), &Mips::GPR32RegClass);
- unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
+ Register DestReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::ADDu, DestReg).addReg(TempReg).addReg(Addr.getReg());
Addr.setReg(DestReg);
Addr.setOffset(0);
@@ -2129,7 +2129,7 @@ unsigned MipsFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
// followed by another instruction that defines the same registers too.
// We can fix this by explicitly marking those registers as dead.
if (MachineInstOpcode == Mips::MUL) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 9377e83524e1..0c2e129b8f1f 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -2523,7 +2523,7 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
MFI.setReturnAddressIsTaken(true);
// Return RA, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(RA, getRegClassFor(VT));
+ Register Reg = MF.addLiveIn(RA, getRegClassFor(VT));
return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, VT);
}
@@ -3051,17 +3051,15 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
// stuck together.
SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
+ for (auto &R : RegsToPass) {
+ Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, R.first, R.second, InFlag);
InFlag = Chain.getValue(1);
}
// Add argument registers to the end of the list so that they are
// known live into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
+ for (auto &R : RegsToPass)
+ Ops.push_back(CLI.DAG.getRegister(R.first, R.second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
diff --git a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index 6d44ce2ab563..59f158688b16 100644
--- a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -80,8 +80,8 @@ private:
MipsInstructionSelector::MipsInstructionSelector(
const MipsTargetMachine &TM, const MipsSubtarget &STI,
const MipsRegisterBankInfo &RBI)
- : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI),
+ : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
+ RBI(RBI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "MipsGenGlobalISel.inc"
diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/llvm/lib/Target/Mips/MipsMachineFunction.cpp
index a7a2be30f58a..411a26e42713 100644
--- a/llvm/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/llvm/lib/Target/Mips/MipsMachineFunction.cpp
@@ -148,14 +148,14 @@ void MipsFunctionInfo::initGlobalBaseReg(MachineFunction &MF) {
void MipsFunctionInfo::createEhDataRegsFI(MachineFunction &MF) {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- for (int I = 0; I < 4; ++I) {
+ for (int &I : EhDataRegFI) {
const TargetRegisterClass &RC =
static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64()
? Mips::GPR64RegClass
: Mips::GPR32RegClass;
- EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(
- TRI.getSpillSize(RC), TRI.getSpillAlign(RC), false);
+ I = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC),
+ TRI.getSpillAlign(RC), false);
}
}
@@ -167,9 +167,9 @@ void MipsFunctionInfo::createISRRegFI(MachineFunction &MF) {
const TargetRegisterClass &RC = Mips::GPR32RegClass;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- for (int I = 0; I < 2; ++I)
- ISRDataRegFI[I] = MF.getFrameInfo().CreateStackObject(
- TRI.getSpillSize(RC), TRI.getSpillAlign(RC), false);
+ for (int &I : ISRDataRegFI)
+ I = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC),
+ TRI.getSpillAlign(RC), false);
}
bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
diff --git a/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp b/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
new file mode 100644
index 000000000000..daaf1135c2b1
--- /dev/null
+++ b/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
@@ -0,0 +1,136 @@
+//===- MipsMulMulBugPass.cpp - Mips VR4300 mulmul bugfix pass -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Early revisions of the VR4300 have a hardware bug where two consecutive
+// multiplications can produce an incorrect result in the second multiply.
+//
+// This pass scans for mul instructions in each basic block and inserts
+// a nop whenever the following conditions are met:
+//
+// - The current instruction is a single or double-precision floating-point
+// mul instruction.
+// - The next instruction is either a mul instruction (any kind)
+// or a branch instruction.
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "mips-vr4300-mulmul-fix"
+
+using namespace llvm;
+
+namespace {
+
+class MipsMulMulBugFix : public MachineFunctionPass {
+public:
+ MipsMulMulBugFix() : MachineFunctionPass(ID) {
+ initializeMipsMulMulBugFixPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Mips VR4300 mulmul bugfix"; }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+
+private:
+ bool fixMulMulBB(MachineBasicBlock &MBB, const MipsInstrInfo &MipsII);
+};
+
+} // namespace
+
+INITIALIZE_PASS(MipsMulMulBugFix, "mips-vr4300-mulmul-fix",
+ "Mips VR4300 mulmul bugfix", false, false)
+
+char MipsMulMulBugFix::ID = 0;
+
+bool MipsMulMulBugFix::runOnMachineFunction(MachineFunction &MF) {
+ const MipsInstrInfo &MipsII =
+ *static_cast<const MipsInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ bool Modified = false;
+
+ for (auto &MBB : MF)
+ Modified |= fixMulMulBB(MBB, MipsII);
+
+ return Modified;
+}
+
+static bool isFirstMul(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case Mips::FMUL_S:
+ case Mips::FMUL_D:
+ case Mips::FMUL_D32:
+ case Mips::FMUL_D64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool isSecondMulOrBranch(const MachineInstr &MI) {
+ if (MI.isBranch() || MI.isIndirectBranch() || MI.isCall())
+ return true;
+
+ switch (MI.getOpcode()) {
+ case Mips::MUL:
+ case Mips::FMUL_S:
+ case Mips::FMUL_D:
+ case Mips::FMUL_D32:
+ case Mips::FMUL_D64:
+ case Mips::MULT:
+ case Mips::MULTu:
+ case Mips::DMULT:
+ case Mips::DMULTu:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool MipsMulMulBugFix::fixMulMulBB(MachineBasicBlock &MBB,
+ const MipsInstrInfo &MipsII) {
+ bool Modified = false;
+
+ MachineBasicBlock::instr_iterator NextMII;
+
+ // Iterate through the instructions in the basic block
+ for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
+ E = MBB.instr_end();
+ MII != E; MII = NextMII) {
+
+ NextMII = next_nodbg(MII, E);
+
+ // Trigger when the current instruction is a mul and the next instruction
+ // is either a mul or a branch in case the branch target start with a mul
+ if (NextMII != E && isFirstMul(*MII) && isSecondMulOrBranch(*NextMII)) {
+ LLVM_DEBUG(dbgs() << "Found mulmul!\n");
+
+ const MCInstrDesc &NewMCID = MipsII.get(Mips::NOP);
+ BuildMI(MBB, NextMII, DebugLoc(), NewMCID);
+ Modified = true;
+ }
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createMipsMulMulBugPass() { return new MipsMulMulBugFix(); }
diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 7cba3118cd62..390ab9d22024 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -159,8 +159,8 @@ getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
- for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I)
- Reserved.set(ReservedGPR32[I]);
+ for (MCPhysReg R : ReservedGPR32)
+ Reserved.set(R);
// Reserve registers for the NaCl sandbox.
if (Subtarget.isTargetNaCl()) {
@@ -169,8 +169,8 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(Mips::T8); // Reserved for thread pointer.
}
- for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I)
- Reserved.set(ReservedGPR64[I]);
+ for (MCPhysReg R : ReservedGPR64)
+ Reserved.set(R);
// For mno-abicalls, GP is a program invariant!
if (!Subtarget.isABICalls()) {
diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index 193d071447ff..7ee2ddf3605f 100644
--- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -454,7 +454,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
// directives.
for (const CalleeSavedInfo &I : CSI) {
int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
// If Reg is a double precision register, emit two cfa_offsets,
// one for each of the paired single precision registers.
@@ -801,7 +801,7 @@ bool MipsSEFrameLowering::spillCalleeSavedRegisters(
// method MipsTargetLowering::lowerRETURNADDR.
// It's killed at the spill, unless the register is RA and return address
// is taken.
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
&& MF->getFrameInfo().isReturnAddressTaken();
if (!IsRAAndRetAddrIsTaken)
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 40b215a8204c..346ebe9664fc 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -85,18 +85,18 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
if (Subtarget.hasDSP()) {
MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
- for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
- addRegisterClass(VecTys[i], &Mips::DSPRRegClass);
+ for (const auto &VecTy : VecTys) {
+ addRegisterClass(VecTy, &Mips::DSPRRegClass);
// Expand all builtin opcodes.
for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
- setOperationAction(Opc, VecTys[i], Expand);
+ setOperationAction(Opc, VecTy, Expand);
- setOperationAction(ISD::ADD, VecTys[i], Legal);
- setOperationAction(ISD::SUB, VecTys[i], Legal);
- setOperationAction(ISD::LOAD, VecTys[i], Legal);
- setOperationAction(ISD::STORE, VecTys[i], Legal);
- setOperationAction(ISD::BITCAST, VecTys[i], Legal);
+ setOperationAction(ISD::ADD, VecTy, Legal);
+ setOperationAction(ISD::SUB, VecTy, Legal);
+ setOperationAction(ISD::LOAD, VecTy, Legal);
+ setOperationAction(ISD::STORE, VecTy, Legal);
+ setOperationAction(ISD::BITCAST, VecTy, Legal);
}
setTargetDAGCombine(ISD::SHL);
@@ -2931,7 +2931,7 @@ static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
// operand is unused and can be replaced with anything. We choose to replace it
// with the used operand since this reduces the number of instructions overall.
static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
- SmallVector<int, 16> Indices,
+ const SmallVector<int, 16> &Indices,
SelectionDAG &DAG) {
SmallVector<SDValue, 16> Ops;
SDValue Op0;
@@ -2953,9 +2953,8 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
Using2ndVec = true;
}
- for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end();
- ++I)
- Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy));
+ for (int Idx : Indices)
+ Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy));
SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
diff --git a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
index b05e9ad827c4..d6481793ef49 100644
--- a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-reg-info"
-MipsSERegisterInfo::MipsSERegisterInfo() : MipsRegisterInfo() {}
+MipsSERegisterInfo::MipsSERegisterInfo() {}
bool MipsSERegisterInfo::
requiresRegisterScavenging(const MachineFunction &MF) const {
diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 8de3c9fd25bd..f9f662a00117 100644
--- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -45,6 +45,10 @@ using namespace llvm;
#define DEBUG_TYPE "mips"
+static cl::opt<bool>
+ EnableMulMulFix("mfix4300", cl::init(false),
+ cl::desc("Enable the VR4300 mulmul bug fix."), cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() {
// Register the target.
RegisterTargetMachine<MipsebTargetMachine> X(getTheMipsTarget());
@@ -58,6 +62,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() {
initializeMipsBranchExpansionPass(*PR);
initializeMicroMipsSizeReducePass(*PR);
initializeMipsPreLegalizerCombinerPass(*PR);
+ initializeMipsMulMulBugFixPass(*PR);
}
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -292,6 +297,11 @@ void MipsPassConfig::addPreEmitPass() {
// instructions which can be remapped to a 16 bit instruction.
addPass(createMicroMipsSizeReducePass());
+ // This pass inserts a nop instruction between two back-to-back multiplication
+ // instructions when the "mfix4300" flag is passed.
+ if (EnableMulMulFix)
+ addPass(createMipsMulMulBugPass());
+
// The delay slot filler pass can potientially create forbidden slot hazards
// for MIPSR6 and therefore it should go before MipsBranchExpansion pass.
addPass(createMipsDelaySlotFillerPass());
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 82d332ab3f08..da0cbb32659c 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -108,6 +108,10 @@ void NVPTXInstPrinter::printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O,
// SAT flag
if (Imm & NVPTX::PTXCvtMode::SAT_FLAG)
O << ".sat";
+ } else if (strcmp(Modifier, "relu") == 0) {
+ // RELU flag
+ if (Imm & NVPTX::PTXCvtMode::RELU_FLAG)
+ O << ".relu";
} else if (strcmp(Modifier, "base") == 0) {
// Default operand
switch (Imm & NVPTX::PTXCvtMode::BASE_MASK) {
@@ -139,6 +143,9 @@ void NVPTXInstPrinter::printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O,
case NVPTX::PTXCvtMode::RP:
O << ".rp";
break;
+ case NVPTX::PTXCvtMode::RNA:
+ O << ".rna";
+ break;
}
} else {
llvm_unreachable("Invalid conversion modifier");
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index c2fd090da084..41e9f375e536 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -137,10 +137,12 @@ enum CvtMode {
RZ,
RM,
RP,
+ RNA,
BASE_MASK = 0x0F,
FTZ_FLAG = 0x10,
- SAT_FLAG = 0x20
+ SAT_FLAG = 0x20,
+ RELU_FLAG = 0x40
};
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 16add48d4602..3a59306c4998 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1214,9 +1214,9 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
std::vector<const GlobalVariable *> &gvars = localDecls[f];
- for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
+ for (const GlobalVariable *GV : gvars) {
O << "\t// demoted variable\n\t";
- printModuleLevelGV(gvars[i], O, true);
+ printModuleLevelGV(GV, O, true);
}
}
@@ -1454,7 +1454,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
NVPTX::CUDA) {
- Type *ETy = PTy->getElementType();
+ Type *ETy = PTy->getPointerElementType();
int addrSpace = PTy->getAddressSpace();
switch (addrSpace) {
default:
@@ -1514,7 +1514,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// param has byVal attribute. So should be a pointer
auto *PTy = dyn_cast<PointerType>(Ty);
assert(PTy && "Param with byval attribute should be a pointer type");
- Type *ETy = PTy->getElementType();
+ Type *ETy = PTy->getPointerElementType();
if (isABI || isKernelFunc) {
// Just print .param .align <a> .b8 .param[size];
@@ -1613,7 +1613,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
// We use the per class virtual register number in the ptx output.
unsigned int numVRs = MRI->getNumVirtRegs();
for (unsigned i = 0; i < numVRs; i++) {
- unsigned int vr = Register::index2VirtReg(i);
+ Register vr = Register::index2VirtReg(i);
const TargetRegisterClass *RC = MRI->getRegClass(vr);
DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
int n = regmap.size();
diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index a9a5eae42c1d..888fc8ffac2c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -96,20 +96,18 @@ bool GenericToNVVM::runOnModule(Module &M) {
// Walk through the instructions in function defitinions, and replace any use
// of original global variables in GVMap with a use of the corresponding
// copies in GVMap. If necessary, promote constants to instructions.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (I->isDeclaration()) {
+ for (Function &F : M) {
+ if (F.isDeclaration()) {
continue;
}
- IRBuilder<> Builder(I->getEntryBlock().getFirstNonPHIOrDbg());
- for (Function::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE;
- ++BBI) {
- for (BasicBlock::iterator II = BBI->begin(), IE = BBI->end(); II != IE;
- ++II) {
- for (unsigned i = 0, e = II->getNumOperands(); i < e; ++i) {
- Value *Operand = II->getOperand(i);
+ IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
+ for (BasicBlock &BB : F) {
+ for (Instruction &II : BB) {
+ for (unsigned i = 0, e = II.getNumOperands(); i < e; ++i) {
+ Value *Operand = II.getOperand(i);
if (isa<Constant>(Operand)) {
- II->setOperand(
- i, remapConstant(&M, &*I, cast<Constant>(Operand), Builder));
+ II.setOperand(
+ i, remapConstant(&M, &F, cast<Constant>(Operand), Builder));
}
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index e2f6b69fc530..eac237bb27bb 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -553,17 +553,30 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// These map to corresponding instructions for f32/f64. f16 must be
// promoted to f32. v2f16 is expanded to f16, which is then promoted
// to f32.
- for (const auto &Op : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS,
- ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM}) {
+ for (const auto &Op :
+ {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FABS}) {
setOperationAction(Op, MVT::f16, Promote);
setOperationAction(Op, MVT::f32, Legal);
setOperationAction(Op, MVT::f64, Legal);
setOperationAction(Op, MVT::v2f16, Expand);
}
- setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
- setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
- setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
+ // max.f16, max.f16x2 and max.NaN are supported on sm_80+.
+ auto GetMinMaxAction = [&](LegalizeAction NotSm80Action) {
+ bool IsAtLeastSm80 = STI.getSmVersion() >= 80 && STI.getPTXVersion() >= 70;
+ return IsAtLeastSm80 ? Legal : NotSm80Action;
+ };
+ for (const auto &Op : {ISD::FMINNUM, ISD::FMAXNUM}) {
+ setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Promote), Promote);
+ setOperationAction(Op, MVT::f32, Legal);
+ setOperationAction(Op, MVT::f64, Legal);
+ setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
+ }
+ for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) {
+ setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand);
+ setOperationAction(Op, MVT::f32, GetMinMaxAction(Expand));
+ setOperationAction(Op, MVT::f64, GetMinMaxAction(Expand));
+ setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
+ }
// No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate.
// No FPOW or FREM in PTX.
@@ -1341,7 +1354,7 @@ std::string NVPTXTargetLowering::getPrototype(
}
auto *PTy = dyn_cast<PointerType>(Ty);
assert(PTy && "Param with byval attribute should be a pointer type");
- Type *ETy = PTy->getElementType();
+ Type *ETy = PTy->getPointerElementType();
Align align = Outs[OIdx].Flags.getNonZeroByValAlign();
unsigned sz = DL.getTypeAllocSize(ETy);
@@ -1564,7 +1577,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<uint64_t, 16> Offsets;
auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
assert(PTy && "Type of a byval parameter should be pointer");
- ComputePTXValueVTs(*this, DL, PTy->getElementType(), VTs, &Offsets, 0);
+ ComputePTXValueVTs(*this, DL, PTy->getPointerElementType(), VTs, &Offsets,
+ 0);
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = Outs[OIdx].Flags.getByValSize();
@@ -2434,7 +2448,7 @@ static bool isImageOrSamplerVal(const Value *arg, const Module *context) {
if (!context)
return false;
- auto *STy = dyn_cast<StructType>(PTy->getElementType());
+ auto *STy = dyn_cast<StructType>(PTy->getPointerElementType());
if (!STy || STy->isLiteral())
return false;
diff --git a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index fc0d5cc6fbfa..eeedce2d99cb 100644
--- a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -57,12 +57,9 @@ bool NVPTXImageOptimizer::runOnFunction(Function &F) {
InstrToDelete.clear();
// Look for call instructions in the function
- for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;
- ++BI) {
- for (BasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
- I != E; ++I) {
- Instruction &Instr = *I;
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &Instr : BB) {
+ if (CallInst *CI = dyn_cast<CallInst>(&Instr)) {
Function *CalledF = CI->getCalledFunction();
if (CalledF && CalledF->isIntrinsic()) {
// This is an intrinsic function call, check if its an istypep
@@ -84,8 +81,8 @@ bool NVPTXImageOptimizer::runOnFunction(Function &F) {
}
// Delete any istypep instances we replaced in the IR
- for (unsigned i = 0, e = InstrToDelete.size(); i != e; ++i)
- InstrToDelete[i]->eraseFromParent();
+ for (Instruction *I : InstrToDelete)
+ I->eraseFromParent();
return Changed;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 96386af569de..22e200e77831 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -48,6 +48,7 @@ def CvtRN : PatLeaf<(i32 0x5)>;
def CvtRZ : PatLeaf<(i32 0x6)>;
def CvtRM : PatLeaf<(i32 0x7)>;
def CvtRP : PatLeaf<(i32 0x8)>;
+def CvtRNA : PatLeaf<(i32 0x9)>;
def CvtNONE_FTZ : PatLeaf<(i32 0x10)>;
def CvtRNI_FTZ : PatLeaf<(i32 0x11)>;
@@ -62,6 +63,10 @@ def CvtRP_FTZ : PatLeaf<(i32 0x18)>;
def CvtSAT : PatLeaf<(i32 0x20)>;
def CvtSAT_FTZ : PatLeaf<(i32 0x30)>;
+def CvtNONE_RELU : PatLeaf<(i32 0x40)>;
+def CvtRN_RELU : PatLeaf<(i32 0x45)>;
+def CvtRZ_RELU : PatLeaf<(i32 0x46)>;
+
def CvtMode : Operand<i32> {
let PrintMethod = "printCvtMode";
}
@@ -249,6 +254,32 @@ multiclass F3<string OpcStr, SDNode OpNode> {
(ins Float32Regs:$a, f32imm:$b),
!strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
+
+ def f16rr_ftz :
+ NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
+ [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>,
+ Requires<[useFP16Math, doF32FTZ]>;
+ def f16rr :
+ NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
+ [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>,
+ Requires<[useFP16Math]>;
+
+ def f16x2rr_ftz :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
+ [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>,
+ Requires<[useFP16Math, doF32FTZ]>;
+ def f16x2rr :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
+ [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>,
+ Requires<[useFP16Math]>;
}
// Template for instructions which take three FP args. The
@@ -500,6 +531,29 @@ let hasSideEffects = false in {
"cvt.s64.s16 \t$dst, $src;", []>;
def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
"cvt.s64.s32 \t$dst, $src;", []>;
+
+multiclass CVT_FROM_FLOAT_SM80<string FromName, RegisterClass RC> {
+ def _f32 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Float32Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:relu}.",
+ FromName, ".f32 \t$dst, $src;"), []>,
+ Requires<[hasPTX70, hasSM80]>;
+ }
+
+ defm CVT_bf16 : CVT_FROM_FLOAT_SM80<"bf16", Int16Regs>;
+
+ multiclass CVT_FROM_FLOAT_V2_SM80<string FromName, RegisterClass RC> {
+ def _f32 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Float32Regs:$src1, Float32Regs:$src2, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:relu}.",
+ FromName, ".f32 \t$dst, $src1, $src2;"), []>,
+ Requires<[hasPTX70, hasSM80]>;
+ }
+
+ defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Float16x2Regs>;
+ defm CVT_bf16x2 : CVT_FROM_FLOAT_V2_SM80<"bf16x2", Int32Regs>;
}
//-----------------------------------
@@ -842,6 +896,8 @@ defm FMUL : F3_fma_component<"mul", fmul>;
defm FMIN : F3<"min", fminnum>;
defm FMAX : F3<"max", fmaxnum>;
+defm FMINNAN : F3<"min.NaN", fminimum>;
+defm FMAXNAN : F3<"max.NaN", fmaximum>;
defm FABS : F2<"abs", fabs>;
defm FNEG : F2<"neg", fneg>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 511cd875ac55..ec069a0a02ae 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1046,6 +1046,38 @@ def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
(CVT_f32_u32 Int32Regs:$a, CvtRP)>;
+def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
+ (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
+def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
+ (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
+def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
+ (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
+def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
+ (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
+
+def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
+ (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
+def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
+ (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
+def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
+ (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
+def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
+ (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
+
+def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
+ (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
+def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
+ (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
+def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
+ (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
+def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
+ (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
+
+def CVT_tf32_f32 :
+ NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
+ "cvt.rna.tf32.f32 \t$dest, $a;",
+ [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
+
def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 6cf59d285e8d..f655f25602bc 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -66,10 +66,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
// Collect all aggregate loads and mem* calls.
- for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
- for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
- ++II) {
- if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (!LI->hasOneUse())
continue;
@@ -81,7 +80,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
continue;
AggrLoads.push_back(LI);
}
- } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
+ } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(&I)) {
// Convert intrinsic calls with variable size or with constant size
// larger than the MaxAggrCopySize threshold.
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index ddb7f097fe68..67aa49132016 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -233,7 +233,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
assert(PType && "Expecting pointer type in handleByValParam");
- Type *StructType = PType->getElementType();
+ Type *StructType = PType->getPointerElementType();
auto IsALoadChain = [&](Value *Start) {
SmallVector<Value *, 16> ValuesToCheck = {Start};
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 05c20369abf4..5a6440c91fca 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -49,8 +49,8 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const NVPTXTargetMachine &TM)
: NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
- SmVersion(20), TM(TM), InstrInfo(),
- TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), FrameLowering() {}
+ SmVersion(20), TM(TM),
+ TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {}
bool NVPTXSubtarget::hasImageHandles() const {
// Enable handles for Kepler+, where CUDA supports indirect surfaces and
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 366d92a5a805..4645671a0cd8 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -17,7 +17,7 @@ namespace llvm {
class NVPTXTargetObjectFile : public TargetLoweringObjectFile {
public:
- NVPTXTargetObjectFile() : TargetLoweringObjectFile() {}
+ NVPTXTargetObjectFile() {}
~NVPTXTargetObjectFile() override;
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 74d129d330f3..2d6d72777db2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -286,8 +286,7 @@ bool getAlign(const Function &F, unsigned index, unsigned &align) {
bool retval = findAllNVVMAnnotation(&F, "align", Vs);
if (!retval)
return false;
- for (int i = 0, e = Vs.size(); i < e; i++) {
- unsigned v = Vs[i];
+ for (unsigned v : Vs) {
if ((v >> 16) == index) {
align = v & 0xFFFF;
return true;
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index ded922329ebf..715cff72dcab 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -121,6 +121,7 @@ class PPCAsmParser : public MCTargetAsmParser {
bool ParseDirectiveMachine(SMLoc L);
bool ParseDirectiveAbiVersion(SMLoc L);
bool ParseDirectiveLocalEntry(SMLoc L);
+ bool ParseGNUAttribute(SMLoc L);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
@@ -201,7 +202,8 @@ struct PPCOperand : public MCParsedAsmOperand {
struct TLSRegOp TLSReg;
};
- PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ PPCOperand(KindTy K) : Kind(K) {}
+
public:
PPCOperand(const PPCOperand &o) : MCParsedAsmOperand() {
Kind = o.Kind;
@@ -1604,6 +1606,8 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
ParseDirectiveAbiVersion(DirectiveID.getLoc());
else if (IDVal == ".localentry")
ParseDirectiveLocalEntry(DirectiveID.getLoc());
+ else if (IDVal.startswith(".gnu_attribute"))
+ ParseGNUAttribute(DirectiveID.getLoc());
else
return true;
return false;
@@ -1719,7 +1723,16 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
return false;
}
+bool PPCAsmParser::ParseGNUAttribute(SMLoc L) {
+ int64_t Tag;
+ int64_t IntegerValue;
+ if (!getParser().parseGNUAttribute(L, Tag, IntegerValue))
+ return false;
+
+ getParser().getStreamer().emitGNUAttribute(Tag, IntegerValue);
+ return true;
+}
/// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmParser() {
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
index 7d64816ed6c7..0cd8350e3fdd 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
@@ -65,8 +65,7 @@ private:
PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM,
const PPCSubtarget &STI,
const PPCRegisterBankInfo &RBI)
- : InstructionSelector(), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI),
+ : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "PPCGenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_INIT
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
index 0ca8587ba483..b92b0fc342ec 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -40,9 +40,8 @@ PPCELFStreamer::PPCELFStreamer(MCContext &Context,
std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> OW,
std::unique_ptr<MCCodeEmitter> Emitter)
- : MCELFStreamer(Context, std::move(MAB), std::move(OW),
- std::move(Emitter)), LastLabel(NULL) {
-}
+ : MCELFStreamer(Context, std::move(MAB), std::move(OW), std::move(Emitter)),
+ LastLabel(nullptr) {}
void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst,
const MCSubtargetInfo &STI) {
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index d6e02d0d0862..a651362f703b 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -271,14 +271,14 @@ private:
MCAssembler &MCA = getStreamer().getAssembler();
int64_t Offset;
if (!LocalOffset->evaluateAsAbsolute(Offset, MCA))
- MCA.getContext().reportFatalError(
- LocalOffset->getLoc(), ".localentry expression must be absolute.");
+ MCA.getContext().reportError(LocalOffset->getLoc(),
+ ".localentry expression must be absolute");
switch (Offset) {
default:
- MCA.getContext().reportFatalError(
- LocalOffset->getLoc(),
- ".localentry expression is not a valid power of 2.");
+ MCA.getContext().reportError(
+ LocalOffset->getLoc(), ".localentry expression must be a power of 2");
+ return 0;
case 0:
return 0;
case 1:
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index f3ae0010ad8e..edd3b42d47e1 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -409,8 +409,8 @@ def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 2 input operands
def : InstRW<[P10W_DF_13C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY],
(instrs
- HASHST,
- HASHSTP
+ HASHST, HASHST8,
+ HASHSTP, HASHSTP8
)>;
// 24 Cycles Decimal Floating Point operations, 1 input operands
@@ -619,6 +619,8 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
XSCMPEXPQP,
XSCMPOQP,
XSCMPUQP,
+ XSMAXCQP,
+ XSMINCQP,
XSTSTDCQP,
XXGENPCVBM
)>;
@@ -1336,8 +1338,8 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 2 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY],
(instrs
- HASHCHK,
- HASHCHKP
+ HASHCHK, HASHCHK8,
+ HASHCHKP, HASHCHKP8
)>;
// Single crack instructions
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index f7c049951c54..c088d7847ce4 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -1415,7 +1415,7 @@ def : InstRW<[],
(instregex "NOP_GT_PWR(6|7)$"),
(instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
(instregex "WRTEE(I)?$"),
- (instregex "HASH(ST|STP|CHK|CHKP)$"),
+ (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"),
ATTN,
CLRBHRB,
MFBHRBE,
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index f26c15667a0b..780981806996 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -109,6 +109,23 @@ struct DenseMapInfo<std::pair<const MCSymbol *, MCSymbolRefExpr::VariantKind>> {
namespace {
+enum {
+ // GNU attribute tags for PowerPC ABI
+ Tag_GNU_Power_ABI_FP = 4,
+ Tag_GNU_Power_ABI_Vector = 8,
+ Tag_GNU_Power_ABI_Struct_Return = 12,
+
+ // GNU attribute values for PowerPC float ABI, as combination of two parts
+ Val_GNU_Power_ABI_NoFloat = 0b00,
+ Val_GNU_Power_ABI_HardFloat_DP = 0b01,
+ Val_GNU_Power_ABI_SoftFloat_DP = 0b10,
+ Val_GNU_Power_ABI_HardFloat_SP = 0b11,
+
+ Val_GNU_Power_ABI_LDBL_IBM128 = 0b0100,
+ Val_GNU_Power_ABI_LDBL_64 = 0b1000,
+ Val_GNU_Power_ABI_LDBL_IEEE128 = 0b1100,
+};
+
class PPCAsmPrinter : public AsmPrinter {
protected:
// For TLS on AIX, we need to be able to identify TOC entries of specific
@@ -178,6 +195,8 @@ public:
return "Linux PPC Assembly Printer";
}
+ void emitGNUAttributes(Module &M);
+
void emitStartOfAsmFile(Module &M) override;
void emitEndOfAsmFile(Module &) override;
@@ -1388,6 +1407,28 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}
+void PPCLinuxAsmPrinter::emitGNUAttributes(Module &M) {
+ // Emit float ABI into GNU attribute
+ Metadata *MD = M.getModuleFlag("float-abi");
+ MDString *FloatABI = dyn_cast_or_null<MDString>(MD);
+ if (!FloatABI)
+ return;
+ StringRef flt = FloatABI->getString();
+ // TODO: Support emitting soft-fp and hard double/single attributes.
+ if (flt == "doubledouble")
+ OutStreamer->emitGNUAttribute(Tag_GNU_Power_ABI_FP,
+ Val_GNU_Power_ABI_HardFloat_DP |
+ Val_GNU_Power_ABI_LDBL_IBM128);
+ else if (flt == "ieeequad")
+ OutStreamer->emitGNUAttribute(Tag_GNU_Power_ABI_FP,
+ Val_GNU_Power_ABI_HardFloat_DP |
+ Val_GNU_Power_ABI_LDBL_IEEE128);
+ else if (flt == "ieeedouble")
+ OutStreamer->emitGNUAttribute(Tag_GNU_Power_ABI_FP,
+ Val_GNU_Power_ABI_HardFloat_DP |
+ Val_GNU_Power_ABI_LDBL_64);
+}
+
void PPCLinuxAsmPrinter::emitInstruction(const MachineInstr *MI) {
if (!Subtarget->isPPC64())
return PPCAsmPrinter::emitInstruction(MI);
@@ -1642,6 +1683,8 @@ void PPCLinuxAsmPrinter::emitEndOfAsmFile(Module &M) {
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
+ emitGNUAttributes(M);
+
if (!TOC.empty()) {
const char *Name = isPPC64 ? ".toc" : ".got2";
MCSectionELF *Section = OutContext.getELFSection(
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 856569bc8a73..e7cd107c5046 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -150,7 +150,7 @@ class PPCFastISel final : public FastISel {
unsigned copyRegToRegClass(const TargetRegisterClass *ToRC,
unsigned SrcReg, unsigned Flag = 0,
unsigned SubReg = 0) {
- unsigned TmpReg = createResultReg(ToRC);
+ Register TmpReg = createResultReg(ToRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
return TmpReg;
@@ -428,7 +428,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
// put the alloca address into a register, set the base type back to
// register and continue. This should almost never happen.
if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
- unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
Addr.Base.Reg = ResultReg;
@@ -604,7 +604,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
// Look at the currently assigned register for this instruction
// to determine the required register class. This is necessary
// to constrain RA from using R0/X0 when this is not legal.
- unsigned AssignedReg = FuncInfo.ValueMap[I];
+ Register AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
@@ -783,7 +783,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
PPCPred = PPC::InvertPredicate(PPCPred);
}
- unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
+ Register CondReg = createResultReg(&PPC::CRRCRegClass);
if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
CondReg, PPCPred))
@@ -847,7 +847,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
}
}
- unsigned SrcReg1 = getRegForValue(SrcValue1);
+ Register SrcReg1 = getRegForValue(SrcValue1);
if (SrcReg1 == 0)
return false;
@@ -928,13 +928,13 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
}
if (NeedsExt) {
- unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ Register ExtReg = createResultReg(&PPC::GPRCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
return false;
SrcReg1 = ExtReg;
if (!UseImm) {
- unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ Register ExtReg = createResultReg(&PPC::GPRCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
return false;
SrcReg2 = ExtReg;
@@ -960,7 +960,7 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) {
if (SrcVT != MVT::f32 || DestVT != MVT::f64)
return false;
- unsigned SrcReg = getRegForValue(Src);
+ Register SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
@@ -978,7 +978,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
if (SrcVT != MVT::f64 || DestVT != MVT::f32)
return false;
- unsigned SrcReg = getRegForValue(Src);
+ Register SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
@@ -1019,7 +1019,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
// If necessary, extend 32-bit int to 64-bit.
if (SrcVT == MVT::i32) {
- unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ Register TmpReg = createResultReg(&PPC::G8RCRegClass);
if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
return 0;
SrcReg = TmpReg;
@@ -1079,7 +1079,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
SrcVT != MVT::i32 && SrcVT != MVT::i64)
return false;
- unsigned SrcReg = getRegForValue(Src);
+ Register SrcReg = getRegForValue(Src);
if (SrcReg == 0)
return false;
@@ -1091,7 +1091,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
else
Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
- unsigned DestReg = createResultReg(&PPC::SPERCRegClass);
+ Register DestReg = createResultReg(&PPC::SPERCRegClass);
// Generate the convert.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
.addReg(SrcReg);
@@ -1114,7 +1114,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
// Extend the input if necessary.
if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
- unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ Register TmpReg = createResultReg(&PPC::G8RCRegClass);
if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
return false;
SrcVT = MVT::i64;
@@ -1128,7 +1128,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
// Determine the opcode for the conversion.
const TargetRegisterClass *RC = &PPC::F8RCRegClass;
- unsigned DestReg = createResultReg(RC);
+ Register DestReg = createResultReg(RC);
unsigned Opc;
if (DstVT == MVT::f32)
@@ -1170,7 +1170,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
// Look at the currently assigned register for this instruction
// to determine the required register class.
- unsigned AssignedReg = FuncInfo.ValueMap[I];
+ Register AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
@@ -1206,7 +1206,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
return false;
- unsigned SrcReg = getRegForValue(Src);
+ Register SrcReg = getRegForValue(Src);
if (SrcReg == 0)
return false;
@@ -1276,7 +1276,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
// Look at the currently assigned register for this instruction
// to determine the required register class. If there is no register,
// make a conservative choice (don't assign R0).
- unsigned AssignedReg = FuncInfo.ValueMap[I];
+ Register AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
(AssignedReg ? MRI.getRegClass(AssignedReg) :
&PPC::GPRC_and_GPRC_NOR0RegClass);
@@ -1296,8 +1296,8 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
break;
}
- unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
- unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+ Register ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
+ Register SrcReg1 = getRegForValue(I->getOperand(0));
if (SrcReg1 == 0) return false;
// Handle case of small immediate operand.
@@ -1355,7 +1355,7 @@ bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
}
// Reg-reg case.
- unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+ Register SrcReg2 = getRegForValue(I->getOperand(1));
if (SrcReg2 == 0) return false;
// Reverse operands for subtract-from.
@@ -1441,7 +1441,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
MVT DestVT = VA.getLocVT();
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
- unsigned TmpReg = createResultReg(RC);
+ Register TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
llvm_unreachable("Failed to emit a sext!");
ArgVT = DestVT;
@@ -1453,7 +1453,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
MVT DestVT = VA.getLocVT();
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
- unsigned TmpReg = createResultReg(RC);
+ Register TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
llvm_unreachable("Failed to emit a zext!");
ArgVT = DestVT;
@@ -1628,7 +1628,7 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (ArgVT.isVector() || ArgVT == MVT::f128)
return false;
- unsigned Arg = getRegForValue(ArgValue);
+ Register Arg = getRegForValue(ArgValue);
if (Arg == 0)
return false;
@@ -1734,7 +1734,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
RetRegs.push_back(RetReg);
} else {
- unsigned Reg = getRegForValue(RV);
+ Register Reg = getRegForValue(RV);
if (Reg == 0)
return false;
@@ -1767,7 +1767,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
case CCValAssign::ZExt: {
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
- unsigned TmpReg = createResultReg(RC);
+ Register TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
return false;
SrcReg = TmpReg;
@@ -1776,7 +1776,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
case CCValAssign::SExt: {
const TargetRegisterClass *RC =
(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
- unsigned TmpReg = createResultReg(RC);
+ Register TmpReg = createResultReg(RC);
if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
return false;
SrcReg = TmpReg;
@@ -1857,7 +1857,7 @@ bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
// Attempt to fast-select an indirect branch instruction.
bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
- unsigned AddrReg = getRegForValue(I->getOperand(0));
+ Register AddrReg = getRegForValue(I->getOperand(0));
if (AddrReg == 0)
return false;
@@ -1884,7 +1884,7 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) {
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
return false;
- unsigned SrcReg = getRegForValue(Src);
+ Register SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
@@ -1903,7 +1903,7 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) {
Type *SrcTy = Src->getType();
bool IsZExt = isa<ZExtInst>(I);
- unsigned SrcReg = getRegForValue(Src);
+ Register SrcReg = getRegForValue(Src);
if (!SrcReg) return false;
EVT SrcEVT, DestEVT;
@@ -1921,12 +1921,12 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) {
// instruction, use it. Otherwise pick the register class of the
// correct size that does not contain X0/R0, since we don't know
// whether downstream uses permit that assignment.
- unsigned AssignedReg = FuncInfo.ValueMap[I];
+ Register AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
(AssignedReg ? MRI.getRegClass(AssignedReg) :
(DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
&PPC::GPRC_and_GPRC_NOR0RegClass));
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
return false;
@@ -1966,15 +1966,6 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
return SelectBinaryIntOp(I, ISD::OR);
case Instruction::Sub:
return SelectBinaryIntOp(I, ISD::SUB);
- case Instruction::Call:
- // On AIX, call lowering uses the DAG-ISEL path currently so that the
- // callee of the direct function call instruction will be mapped to the
- // symbol for the function's entry point, which is distinct from the
- // function descriptor symbol. The latter is the symbol whose XCOFF symbol
- // name is the C-linkage name of the source level function.
- if (TM.getTargetTriple().isOSAIX())
- break;
- return selectCall(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::Trunc:
@@ -2012,7 +2003,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
else
RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
- unsigned DestReg = createResultReg(RC);
+ Register DestReg = createResultReg(RC);
CodeModel::Model CModel = TM.getCodeModel();
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
@@ -2026,7 +2017,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
else
Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
- unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ Register TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
PPCFuncInfo->setUsesTOCBasePtr();
// For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
@@ -2043,7 +2034,7 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
// But for large code model, we must generate a LDtocL followed
// by the LF[SD].
if (CModel == CodeModel::Large) {
- unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ Register TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
@@ -2068,7 +2059,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
assert(VT == MVT::i64 && "Non-address!");
const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
- unsigned DestReg = createResultReg(RC);
+ Register DestReg = createResultReg(RC);
// Global values may be plain old object addresses, TLS object
// addresses, constant pool entries, or jump tables. How we generate
@@ -2083,6 +2074,12 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
if (GV->isThreadLocal())
return 0;
+ // If the global has the toc-data attribute then fallback to DAG-ISEL.
+ if (TM.getTargetTriple().isOSAIX())
+ if (const GlobalVariable *Var = dyn_cast_or_null<GlobalVariable>(GV))
+ if (Var->hasAttribute("toc-data"))
+ return false;
+
PPCFuncInfo->setUsesTOCBasePtr();
// For small code model, generate a simple TOC load.
if (CModel == CodeModel::Small)
@@ -2099,7 +2096,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// Otherwise we generate:
// ADDItocL(ADDIStocHA8(%x2, GV), GV)
// Either way, start with the ADDIStocHA8:
- unsigned HighPartReg = createResultReg(RC);
+ Register HighPartReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
@@ -2123,7 +2120,7 @@ unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
unsigned Lo = Imm & 0xFFFF;
unsigned Hi = (Imm >> 16) & 0xFFFF;
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
if (isInt<16>(Imm))
@@ -2132,7 +2129,7 @@ unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
.addImm(Imm);
else if (Lo) {
// Both Lo and Hi have nonzero bits.
- unsigned TmpReg = createResultReg(RC);
+ Register TmpReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
.addImm(Hi);
@@ -2195,7 +2192,7 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
TmpReg3 = TmpReg2;
if ((Lo = Remainder & 0xFFFF)) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
ResultReg).addReg(TmpReg3).addImm(Lo);
return ResultReg;
@@ -2211,7 +2208,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
if (VT == MVT::i1 && Subtarget->useCRBits()) {
- unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
+ Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
return ImmReg;
@@ -2231,7 +2228,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
// a range of 0..0x7fff.
if (isInt<16>(Imm)) {
unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
- unsigned ImmReg = createResultReg(RC);
+ Register ImmReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
.addImm(Imm);
return ImmReg;
@@ -2283,7 +2280,7 @@ unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
ResultReg).addFrameIndex(SI->second).addImm(0);
return ResultReg;
@@ -2393,7 +2390,7 @@ unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
if (VT == MVT::i1 && Subtarget->useCRBits()) {
- unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
+ Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
return ImmReg;
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 3ca563fee970..65c969c196e1 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -674,7 +674,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
: PPC::MFCR);
const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
const MCInstrDesc &HashST =
- TII.get(HasPrivileged ? PPC::HASHSTP : PPC::HASHST);
+ TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
+ : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
// Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
// LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
@@ -1172,7 +1173,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// CFA.
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
// This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
@@ -1195,7 +1196,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
// the whole CR word. In the ELFv2 ABI, every CR that was
// actually saved gets its own CFI record.
- unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
+ Register CRReg = isELFv2ABI? Reg : PPC::CR2;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1590,7 +1591,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
: PPC::MTOCRF);
const MCInstrDesc &HashChk =
- TII.get(HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK);
+ TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
+ : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
int64_t LROffset = getReturnSaveOffset();
int64_t FPOffset = 0;
@@ -2085,7 +2087,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
SmallVector<CalleeSavedInfo, 18> VRegs;
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
(Reg != PPC::X2 && Reg != PPC::R2)) &&
"Not expecting to try to spill R2 in a function that must save TOC");
@@ -2337,7 +2339,7 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
if (BVAllocatable.none())
return false;
- unsigned Reg = CS.getReg();
+ Register Reg = CS.getReg();
if (!PPC::G8RCRegClass.contains(Reg)) {
AllSpilledToReg = false;
@@ -2395,7 +2397,7 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
});
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
// CR2 through CR4 are the nonvolatile CR fields.
bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
@@ -2581,7 +2583,7 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
--BeforeI;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
+ Register Reg = CSI[i].getReg();
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
continue;
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index ba74af5ef5f7..fdcf6e7e80f2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1365,8 +1365,7 @@ class BitPermutationSelector {
ValueBit(SDValue V, unsigned I, Kind K = Variable)
: V(V), Idx(I), K(K) {}
- ValueBit(Kind K = Variable)
- : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
+ ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
bool isZero() const {
return K == ConstZero || K == VariableKnownToBeZero;
@@ -4438,7 +4437,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
// Force the ccreg into CR7.
SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
- SDValue InFlag(nullptr, 0); // Null incoming flag value.
+ SDValue InFlag; // Null incoming flag value.
CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
InFlag).getValue(1);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8d6edf07bc53..25cc34badda0 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2433,7 +2433,7 @@ unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
/// the constant being splatted. The ByteSize field indicates the number of
/// bytes of each element [124] -> [bhw].
SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
- SDValue OpVal(nullptr, 0);
+ SDValue OpVal;
// If ByteSize of the splat is bigger than the element size of the
// build_vector, then we have a case where we are checking for a splat where
@@ -3508,8 +3508,9 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
int ShuffV[] = {1, 0, 3, 2};
SDValue Shuff =
DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
- return DAG.getBitcast(
- MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32));
+ return DAG.getBitcast(MVT::v2i64,
+ DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
+ dl, MVT::v4i32, Shuff, SetCC32));
}
// We handle most of these in the usual way.
@@ -4078,8 +4079,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// virtual ones.
if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
assert(i + 1 < e && "No second half of double precision argument");
- unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
- unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
+ Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
+ Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
if (!Subtarget.isLittleEndian())
@@ -4087,7 +4088,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
ArgValueHi);
} else {
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
ValVT == MVT::i1 ? MVT::i32 : ValVT);
if (ValVT == MVT::i1)
@@ -4179,7 +4180,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// dereferencing the result of va_next.
for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
// Get an existing live-in vreg, or add a new one.
- unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
+ Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
if (!VReg)
VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
@@ -4198,7 +4199,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// on the stack.
for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
// Get an existing live-in vreg, or add a new one.
- unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
+ Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
if (!VReg)
VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
@@ -4384,7 +4385,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
InVals.push_back(Arg);
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
+ Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
FuncInfo->addLiveInAttr(VReg, Flags);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
@@ -4408,7 +4409,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
if (GPR_idx == Num_GPR_Regs)
break;
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
FuncInfo->addLiveInAttr(VReg, Flags);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Addr = FIN;
@@ -4432,7 +4433,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::i64:
if (Flags.isNest()) {
// The 'nest' parameter, if any, is passed in R11.
- unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
+ Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
@@ -4445,7 +4446,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
// passed directly. Clang may use those instead of "byval" aggregate
// types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
+ Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
FuncInfo->addLiveInAttr(VReg, Flags);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
@@ -4491,7 +4492,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
// This can only ever happen in the presence of f32 array types,
// since otherwise we never run out of FPRs before running out
// of GPRs.
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
+ Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
FuncInfo->addLiveInAttr(VReg, Flags);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
@@ -4532,7 +4533,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
// passed directly. The latter are used to implement ELFv2 homogenous
// vector aggregates.
if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++VR_idx;
} else {
@@ -4591,7 +4592,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
// the result of va_next.
for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
GPR_idx < Num_GPR_Regs; ++GPR_idx) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
@@ -7059,7 +7060,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
unsigned Offset) {
- const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
+ const Register VReg = MF.addLiveIn(PhysReg, RegClass);
// Since the callers side has left justified the aggregate in the
// register, we can simply store the entire register into the stack
// slot.
@@ -7156,7 +7157,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
(CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
GPRIndex < NumGPArgRegs; ++GPRIndex) {
- const unsigned VReg =
+ const Register VReg =
IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
: MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
@@ -11178,13 +11179,17 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
+ case ISD::FP_TO_UINT: {
// LowerFP_TO_INT() can only handle f32 and f64.
if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
MVT::ppcf128)
return;
- Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
+ SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
+ Results.push_back(LoweredValue);
+ if (N->isStrictFPOpcode())
+ Results.push_back(LoweredValue.getValue(1));
return;
+ }
case ISD::TRUNCATE: {
if (!N->getValueType(0).isVector())
return;
@@ -17890,7 +17895,7 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
"Only support quadword now");
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Type *ValTy = cast<PointerType>(AlignedAddr->getType())->getElementType();
+ Type *ValTy = AlignedAddr->getType()->getPointerElementType();
assert(ValTy->getPrimitiveSizeInBits() == 128);
Function *RMW = Intrinsic::getDeclaration(
M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation()));
@@ -17915,7 +17920,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
"Only support quadword now");
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Type *ValTy = cast<PointerType>(AlignedAddr->getType())->getElementType();
+ Type *ValTy = AlignedAddr->getType()->getPointerElementType();
assert(ValTy->getPrimitiveSizeInBits() == 128);
Function *IntCmpXchg =
Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 87b7f96112ec..eb52e4aa6273 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1456,4 +1456,4 @@ namespace llvm {
} // end namespace llvm
-#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#endif // LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 58af8037f59c..eae8e36e475e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1760,26 +1760,27 @@ defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB),
// These instructions store a hash computed from the value of the link register
// and the value of the stack pointer.
-let mayStore = 1 in {
-def HASHST : XForm_XD6_RA5_RB5<31, 722, (outs),
- (ins g8rc:$RB, memrihash:$D_RA_XD),
- "hashst $RB, $D_RA_XD", IIC_IntGeneral, []>;
-def HASHSTP : XForm_XD6_RA5_RB5<31, 658, (outs),
+let mayStore = 1, Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+def HASHST8 : XForm_XD6_RA5_RB5<31, 722, (outs),
(ins g8rc:$RB, memrihash:$D_RA_XD),
- "hashstp $RB, $D_RA_XD", IIC_IntGeneral, []>;
+ "hashst $RB, $D_RA_XD", IIC_IntGeneral, []>;
+def HASHSTP8 : XForm_XD6_RA5_RB5<31, 658, (outs),
+ (ins g8rc:$RB, memrihash:$D_RA_XD),
+ "hashstp $RB, $D_RA_XD", IIC_IntGeneral, []>;
}
// These instructions check a hash computed from the value of the link register
// and the value of the stack pointer. The hasSideEffects flag is needed as the
// instruction may TRAP if the hash does not match the hash stored at the
// specified address.
-let mayLoad = 1, hasSideEffects = 1 in {
-def HASHCHK : XForm_XD6_RA5_RB5<31, 754, (outs),
- (ins g8rc:$RB, memrihash:$D_RA_XD),
- "hashchk $RB, $D_RA_XD", IIC_IntGeneral, []>;
-def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs),
+let mayLoad = 1, hasSideEffects = 1,
+ Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+def HASHCHK8 : XForm_XD6_RA5_RB5<31, 754, (outs),
(ins g8rc:$RB, memrihash:$D_RA_XD),
- "hashchkp $RB, $D_RA_XD", IIC_IntGeneral, []>;
+ "hashchk $RB, $D_RA_XD", IIC_IntGeneral, []>;
+def HASHCHKP8 : XForm_XD6_RA5_RB5<31, 690, (outs),
+ (ins g8rc:$RB, memrihash:$D_RA_XD),
+ "hashchkp $RB, $D_RA_XD", IIC_IntGeneral, []>;
}
let Interpretation64Bit = 1, isCodeGenOnly = 1, hasSideEffects = 1 in
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index a0fd2111de11..eada872c2a7d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2339,9 +2339,8 @@ bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
Found = true;
}
} else if (MO.isRegMask()) {
- for (TargetRegisterClass::iterator I = RC->begin(),
- IE = RC->end(); I != IE; ++I)
- if (MO.clobbersPhysReg(*I)) {
+ for (MCPhysReg R : *RC)
+ if (MO.clobbersPhysReg(R)) {
Pred.push_back(MO);
Found = true;
}
@@ -3253,7 +3252,7 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
Register Reg = MI.getOperand(i).getReg();
if (!Register::isVirtualRegister(Reg))
continue;
- unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI);
+ Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
if (Register::isVirtualRegister(TrueReg)) {
DefMI = MRI->getVRegDef(TrueReg);
if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8 ||
@@ -3502,8 +3501,8 @@ bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const {
return false;
assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
- unsigned ToBeChangedReg = ADDIMI->getOperand(0).getReg();
- unsigned ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
+ Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
+ Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
MachineBasicBlock::iterator End) {
for (auto It = ++Start; It != End; It++)
@@ -3720,7 +3719,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
MachineInstr **ToErase) const {
MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
- unsigned FoldingReg = MI.getOperand(1).getReg();
+ Register FoldingReg = MI.getOperand(1).getReg();
if (!Register::isVirtualRegister(FoldingReg))
return false;
MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
@@ -5266,7 +5265,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
if (!Register::isVirtualRegister(SrcReg))
return false;
const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
- if (SrcMI != NULL)
+ if (SrcMI != nullptr)
return isSignOrZeroExtended(*SrcMI, SignExt, Depth);
return false;
@@ -5290,7 +5289,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
if (!Register::isVirtualRegister(SrcReg))
return false;
const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
- if (SrcMI != NULL)
+ if (SrcMI != nullptr)
return isSignOrZeroExtended(*SrcMI, SignExt, Depth);
return false;
@@ -5319,7 +5318,8 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
if (!Register::isVirtualRegister(SrcReg))
return false;
const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
- if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1))
+ if (SrcMI == nullptr ||
+ !isSignOrZeroExtended(*SrcMI, SignExt, Depth + 1))
return false;
}
else
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 2340be5b5915..c26b4f6ceb7d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -5530,6 +5530,30 @@ def DWBytes3210 {
(i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), Word, sub_32));
}
+// These instructions store a hash computed from the value of the link register
+// and the value of the stack pointer.
+let mayStore = 1 in {
+def HASHST : XForm_XD6_RA5_RB5<31, 722, (outs),
+ (ins gprc:$RB, memrihash:$D_RA_XD),
+ "hashst $RB, $D_RA_XD", IIC_IntGeneral, []>;
+def HASHSTP : XForm_XD6_RA5_RB5<31, 658, (outs),
+ (ins gprc:$RB, memrihash:$D_RA_XD),
+ "hashstp $RB, $D_RA_XD", IIC_IntGeneral, []>;
+}
+
+// These instructions check a hash computed from the value of the link register
+// and the value of the stack pointer. The hasSideEffects flag is needed as the
+// instruction may TRAP if the hash does not match the hash stored at the
+// specified address.
+let mayLoad = 1, hasSideEffects = 1 in {
+def HASHCHK : XForm_XD6_RA5_RB5<31, 754, (outs),
+ (ins gprc:$RB, memrihash:$D_RA_XD),
+ "hashchk $RB, $D_RA_XD", IIC_IntGeneral, []>;
+def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs),
+ (ins gprc:$RB, memrihash:$D_RA_XD),
+ "hashchkp $RB, $D_RA_XD", IIC_IntGeneral, []>;
+}
+
// Now both high word and low word are reversed, next
// swap the high word and low word.
def : Pat<(i64 (bitreverse i64:$A)),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index a19289e96b3e..fe354208533b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2398,6 +2398,8 @@ let Predicates = [IsISA3_1] in {
let Predicates = [IsISA3_1, HasVSX] in {
def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;
def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;
+ def XSMAXCQP : X_VT5_VA5_VB5<63, 676, "xsmaxcqp", []>;
+ def XSMINCQP : X_VT5_VA5_VB5<63, 740, "xsmincqp", []>;
}
// Multiclass defining patterns for Set Boolean Extension Reverse Instructions.
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index d12a9b806fd0..e5fa02bc8ccf 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -107,10 +107,10 @@ private:
void initialize(MachineFunction &MFParm);
// Perform peepholes.
- bool simplifyCode(void);
+ bool simplifyCode();
// Perform peepholes.
- bool eliminateRedundantCompare(void);
+ bool eliminateRedundantCompare();
bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
@@ -258,12 +258,12 @@ void PPCMIPeephole::UpdateTOCSaves(
}
bool Keep = true;
- for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
- MachineInstr *CurrInst = It->first;
+ for (auto &I : TOCSaves) {
+ MachineInstr *CurrInst = I.first;
// If new instruction dominates an existing one, mark existing one as
// redundant.
- if (It->second && MDT->dominates(MI, CurrInst))
- It->second = false;
+ if (I.second && MDT->dominates(MI, CurrInst))
+ I.second = false;
// Check if the new instruction is redundant.
if (MDT->dominates(CurrInst, MI)) {
Keep = false;
@@ -381,7 +381,7 @@ static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
}
// Perform peephole optimizations.
-bool PPCMIPeephole::simplifyCode(void) {
+bool PPCMIPeephole::simplifyCode() {
bool Simplified = false;
bool TrapOpt = false;
MachineInstr* ToErase = nullptr;
@@ -481,7 +481,7 @@ bool PPCMIPeephole::simplifyCode(void) {
// PPC::ZERO.
if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
break;
- unsigned MIDestReg = MI.getOperand(0).getReg();
+ Register MIDestReg = MI.getOperand(0).getReg();
for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
Simplified |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
if (MRI->use_nodbg_empty(MIDestReg)) {
@@ -519,9 +519,9 @@ bool PPCMIPeephole::simplifyCode(void) {
// XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
// We have to look through chains of COPY and SUBREG_TO_REG
// to find the real source values for comparison.
- unsigned TrueReg1 =
+ Register TrueReg1 =
TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
- unsigned TrueReg2 =
+ Register TrueReg2 =
TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
if (!(TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)))
@@ -541,7 +541,7 @@ bool PPCMIPeephole::simplifyCode(void) {
auto isConversionOfLoadAndSplat = [=]() -> bool {
if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
return false;
- unsigned FeedReg1 =
+ Register FeedReg1 =
TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
if (Register::isVirtualRegister(FeedReg1)) {
MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
@@ -565,16 +565,16 @@ bool PPCMIPeephole::simplifyCode(void) {
// If this is a splat or a swap fed by another splat, we
// can replace it with a copy.
if (DefOpc == PPC::XXPERMDI) {
- unsigned DefReg1 = DefMI->getOperand(1).getReg();
- unsigned DefReg2 = DefMI->getOperand(2).getReg();
+ Register DefReg1 = DefMI->getOperand(1).getReg();
+ Register DefReg2 = DefMI->getOperand(2).getReg();
unsigned DefImmed = DefMI->getOperand(3).getImm();
// If the two inputs are not the same register, check to see if
// they originate from the same virtual register after only
// copy-like instructions.
if (DefReg1 != DefReg2) {
- unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
- unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
+ Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
+ Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
if (!(FeedReg1 == FeedReg2 &&
Register::isVirtualRegister(FeedReg1)))
@@ -643,7 +643,7 @@ bool PPCMIPeephole::simplifyCode(void) {
case PPC::XXSPLTW: {
unsigned MyOpcode = MI.getOpcode();
unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
- unsigned TrueReg =
+ Register TrueReg =
TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
if (!Register::isVirtualRegister(TrueReg))
break;
@@ -707,7 +707,7 @@ bool PPCMIPeephole::simplifyCode(void) {
}
case PPC::XVCVDPSP: {
// If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
- unsigned TrueReg =
+ Register TrueReg =
TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
if (!Register::isVirtualRegister(TrueReg))
break;
@@ -716,9 +716,9 @@ bool PPCMIPeephole::simplifyCode(void) {
// This can occur when building a vector of single precision or integer
// values.
if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
- unsigned DefsReg1 =
+ Register DefsReg1 =
TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
- unsigned DefsReg2 =
+ Register DefsReg2 =
TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
if (!Register::isVirtualRegister(DefsReg1) ||
!Register::isVirtualRegister(DefsReg2))
@@ -1178,7 +1178,7 @@ static unsigned getIncomingRegForBlock(MachineInstr *Phi,
static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
unsigned SrcReg = Reg;
- while (1) {
+ while (true) {
unsigned NextReg = SrcReg;
MachineInstr *Inst = MRI->getVRegDef(SrcReg);
if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
@@ -1334,7 +1334,7 @@ bool PPCMIPeephole::eliminateRedundantTOCSaves(
// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
// bge 0, .LBB0_4
-bool PPCMIPeephole::eliminateRedundantCompare(void) {
+bool PPCMIPeephole::eliminateRedundantCompare() {
bool Simplified = false;
for (MachineBasicBlock &MBB2 : *MF) {
@@ -1737,4 +1737,3 @@ INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
char PPCMIPeephole::ID = 0;
FunctionPass*
llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
-
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 4bccc5596d2b..76b016c0ee79 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -390,6 +390,18 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+bool PPCRegisterInfo::isAsmClobberable(const MachineFunction &MF,
+ MCRegister PhysReg) const {
+ // We cannot use getReservedRegs() to find the registers that are not asm
+ // clobberable because there are some reserved registers which can be
+ // clobbered by inline asm. For example, when LR is clobbered, the register is
+ // saved and restored. We will hardcode the registers that are not asm
+ // cloberable in this function.
+
+ // The stack pointer (R1/X1) is not clobberable by inline asm
+ return PhysReg != PPC::R1 && PhysReg != PPC::X1;
+}
+
bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const PPCInstrInfo *InstrInfo = Subtarget.getInstrInfo();
@@ -423,7 +435,7 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
continue;
int FrIdx = Info[i].getFrameIdx();
- unsigned Reg = Info[i].getReg();
+ Register Reg = Info[i].getReg();
const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg);
unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(RC);
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 2e534dd1bcd5..114f6d0f4c66 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -91,6 +91,8 @@ public:
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool isAsmClobberable(const MachineFunction &MF,
+ MCRegister PhysReg) const override;
bool isCallerPreservedPhysReg(MCRegister PhysReg,
const MachineFunction &MF) const override;
@@ -185,6 +187,10 @@ public:
return RegName;
}
+
+ bool isNonallocatableRegisterCalleeSave(MCRegister Reg) const override {
+ return Reg == PPC::LR || Reg == PPC::LR8;
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index ed28731b8ef2..cc5738a5d7b6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -374,11 +374,10 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
// clobbers ctr.
auto asmClobbersCTR = [](InlineAsm *IA) {
InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
- for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
- InlineAsm::ConstraintInfo &C = CIV[i];
+ for (const InlineAsm::ConstraintInfo &C : CIV) {
if (C.Type != InlineAsm::isInput)
- for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
- if (StringRef(C.Codes[j]).equals_insensitive("{ctr}"))
+ for (const auto &Code : C.Codes)
+ if (StringRef(Code).equals_insensitive("{ctr}"))
return true;
}
return false;
@@ -653,11 +652,17 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
}
return true;
- } else if (isa<BinaryOperator>(J) &&
- (J->getType()->getScalarType()->isFP128Ty() ||
+ } else if ((J->getType()->getScalarType()->isFP128Ty() ||
J->getType()->getScalarType()->isPPC_FP128Ty())) {
// Most operations on f128 or ppc_f128 values become calls.
return true;
+ } else if (isa<FCmpInst>(J) &&
+ J->getOperand(0)->getType()->getScalarType()->isFP128Ty()) {
+ return true;
+ } else if ((isa<FPTruncInst>(J) || isa<FPExtInst>(J)) &&
+ (cast<CastInst>(J)->getSrcTy()->getScalarType()->isFP128Ty() ||
+ cast<CastInst>(J)->getDestTy()->getScalarType()->isFP128Ty())) {
+ return true;
} else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
CastInst *CI = cast<CastInst>(J);
@@ -1295,8 +1300,8 @@ bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
// Process nested loops first.
- for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo))
+ for (Loop *I : *L)
+ if (canSaveCmp(I, BI, SE, LI, DT, AC, LibInfo))
return false; // Stop search.
HardwareLoopInfo HWLoopInfo(L);
diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 0be35adc35c7..8a7d324ddfe1 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -297,18 +297,16 @@ protected:
// fma result.
LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
- for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
- AI != AE; ++AI) {
+ for (auto &AI : FMAInt) {
// Don't add the segment that corresponds to the original copy.
- if (AI->valno == AddendValNo)
+ if (AI.valno == AddendValNo)
continue;
VNInfo *NewFMAValNo =
- NewFMAInt.getNextValue(AI->start,
- LIS->getVNInfoAllocator());
+ NewFMAInt.getNextValue(AI.start, LIS->getVNInfoAllocator());
- NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
- NewFMAValNo));
+ NewFMAInt.addSegment(
+ LiveInterval::Segment(AI.start, AI.end, NewFMAValNo));
}
LLVM_DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 75592dd4c6f5..a2ea34fe11c7 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -302,7 +302,7 @@ struct RISCVOperand : public MCParsedAsmOperand {
struct VTypeOp VType;
};
- RISCVOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ RISCVOperand(KindTy K) : Kind(K) {}
public:
RISCVOperand(const RISCVOperand &o) : MCParsedAsmOperand() {
@@ -337,7 +337,6 @@ public:
bool isImm() const override { return Kind == KindTy::Immediate; }
bool isMem() const override { return false; }
bool isSystemRegister() const { return Kind == KindTy::SystemRegister; }
- bool isVType() const { return Kind == KindTy::VType; }
bool isGPR() const {
return Kind == KindTy::Register &&
@@ -421,7 +420,27 @@ public:
bool isCSRSystemRegister() const { return isSystemRegister(); }
- bool isVTypeI() const { return isVType(); }
+ bool isVTypeImm(unsigned N) const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isUIntN(N, Imm) && VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
+ // If the last operand of the vsetvli/vsetvli instruction is a constant
+ // expression, KindTy is Immediate.
+ bool isVTypeI10() const {
+ if (Kind == KindTy::Immediate)
+ return isVTypeImm(10);
+ return Kind == KindTy::VType;
+ }
+ bool isVTypeI11() const {
+ if (Kind == KindTy::Immediate)
+ return isVTypeImm(11);
+ return Kind == KindTy::VType;
+ }
/// Return true if the operand is a valid for the fence instruction e.g.
/// ('iorw').
@@ -547,6 +566,16 @@ public:
return IsConstantImm && isUInt<7>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isRnumArg() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && Imm >= INT64_C(0) && Imm <= INT64_C(10) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isSImm5() const {
if (!isImm())
return false;
@@ -898,9 +927,21 @@ public:
Inst.addOperand(MCOperand::createImm(SysReg.Encoding));
}
+ // Support non-canonical syntax:
+ // "vsetivli rd, uimm, 0xabc" or "vsetvli rd, rs1, 0xabc"
+ // "vsetivli rd, uimm, (0xc << N)" or "vsetvli rd, rs1, (0xc << N)"
void addVTypeIOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createImm(getVType()));
+ int64_t Imm = 0;
+ if (Kind == KindTy::Immediate) {
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ (void)IsConstantImm;
+ assert(IsConstantImm && "Invalid VTypeI Operand!");
+ } else {
+ Imm = getVType();
+ }
+ Inst.addOperand(MCOperand::createImm(Imm));
}
// Returns the rounding mode represented by this RISCVOperand. Should only
@@ -1209,6 +1250,9 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
(1 << 4),
"immediate must be in the range");
}
+ case Match_InvalidRnumArg: {
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10);
+ }
}
llvm_unreachable("Unknown match type detected!");
@@ -1881,8 +1925,10 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
Operands.push_back(RISCVOperand::createToken(Name, NameLoc, isRV64()));
// If there are no more operands, then finish
- if (getLexer().is(AsmToken::EndOfStatement))
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ getParser().Lex(); // Consume the EndOfStatement.
return false;
+ }
// Parse first operand
if (parseOperand(Operands, Name))
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 9cfd36745f46..01c6bd90ea58 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -191,7 +191,8 @@ enum OperandType : unsigned {
OPERAND_SIMM12,
OPERAND_UIMM20,
OPERAND_UIMMLOG2XLEN,
- OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN,
+ OPERAND_RVKRNUM,
+ OPERAND_LAST_RISCV_IMM = OPERAND_RVKRNUM,
// Operand is either a register or uimm5, this is used by V extension pseudo
// instructions to represent a value that be passed as AVL to either vsetvli
// or vsetivli.
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index 59d8bb009d1c..7ce7dafb8ca1 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_RISCV_RISCVELFSTREAMER_H
-#define LLVM_LIB_TARGET_RISCV_RISCVELFSTREAMER_H
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVELFSTREAMER_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVELFSTREAMER_H
#include "RISCVTargetStreamer.h"
#include "llvm/MC/MCELFStreamer.h"
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 89a7d54f60f8..3268740849f0 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -85,7 +85,7 @@ void RISCVInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O,
const char *Modifier) {
- assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ assert((Modifier == nullptr || Modifier[0] == 0) && "No modifiers supported");
const MCOperand &MO = MI->getOperand(OpNo);
if (MO.isReg()) {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 0ee6d8de78c9..18858209aa9b 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -141,6 +141,24 @@ static void generateInstSeqImpl(int64_t Val,
Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
}
+static unsigned extractRotateInfo(int64_t Val) {
+ // for case: 0b111..1..xxxxxx1..1..
+ unsigned LeadingOnes = countLeadingOnes((uint64_t)Val);
+ unsigned TrailingOnes = countTrailingOnes((uint64_t)Val);
+ if (TrailingOnes > 0 && TrailingOnes < 64 &&
+ (LeadingOnes + TrailingOnes) > (64 - 12))
+ return 64 - TrailingOnes;
+
+ // for case: 0bxxx1..1..1...xxx
+ unsigned UpperTrailingOnes = countTrailingOnes(Hi_32(Val));
+ unsigned LowerLeadingOnes = countLeadingOnes(Lo_32(Val));
+ if (UpperTrailingOnes < 32 &&
+ (UpperTrailingOnes + LowerLeadingOnes) > (64 - 12))
+ return 32 - UpperTrailingOnes;
+
+ return 0;
+}
+
namespace llvm {
namespace RISCVMatInt {
InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
@@ -312,6 +330,18 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
}
+ // Perform optimization with rori in the Zbb extension.
+ if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbb]) {
+ if (unsigned Rotate = extractRotateInfo(Val)) {
+ RISCVMatInt::InstSeq TmpSeq;
+ uint64_t NegImm12 =
+ ((uint64_t)Val >> (64 - Rotate)) | ((uint64_t)Val << Rotate);
+ assert(isInt<12>(NegImm12));
+ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, NegImm12));
+ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::RORI, Rotate));
+ Res = TmpSeq;
+ }
+ }
return Res;
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
index 02b4b18f54bd..6a8e0c640001 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_RISCV_MATINT_H
-#define LLVM_LIB_TARGET_RISCV_MATINT_H
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_MATINT_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_MATINT_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/SubtargetFeature.h"
@@ -15,7 +15,6 @@
namespace llvm {
class APInt;
-class MCSubtargetInfo;
namespace RISCVMatInt {
struct Inst {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
index 0bda3de0ce5d..171780d94ce7 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_RISCV_RISCVTARGETSTREAMER_H
-#define LLVM_LIB_TARGET_RISCV_RISCVTARGETSTREAMER_H
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVTARGETSTREAMER_H
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index b415c9f35e7f..03462240fd93 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -40,6 +40,9 @@ FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
FunctionPass *createRISCVGatherScatterLoweringPass();
void initializeRISCVGatherScatterLoweringPass(PassRegistry &);
+FunctionPass *createRISCVSExtWRemovalPass();
+void initializeRISCVSExtWRemovalPass(PassRegistry &);
+
FunctionPass *createRISCVMergeBaseOffsetOptPass();
void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 6aa915c01929..5b0f27c5e937 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -42,7 +42,7 @@ def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">,
"'D' (Double-Precision Floating-Point)">;
def FeatureStdExtZfhmin
- : SubtargetFeature<"experimental-zfhmin", "HasStdExtZfhmin", "true",
+ : SubtargetFeature<"zfhmin", "HasStdExtZfhmin", "true",
"'Zfhmin' (Half-Precision Floating-Point Minimal)",
[FeatureStdExtF]>;
def HasStdExtZfhmin : Predicate<"Subtarget->hasStdExtZfhmin()">,
@@ -50,7 +50,7 @@ def HasStdExtZfhmin : Predicate<"Subtarget->hasStdExtZfhmin()">,
"'Zfhmin' (Half-Precision Floating-Point Minimal)">;
def FeatureStdExtZfh
- : SubtargetFeature<"experimental-zfh", "HasStdExtZfh", "true",
+ : SubtargetFeature<"zfh", "HasStdExtZfh", "true",
"'Zfh' (Half-Precision Floating-Point)",
[FeatureStdExtZfhmin, FeatureStdExtF]>;
def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">,
@@ -65,83 +65,217 @@ def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">,
"'C' (Compressed Instructions)">;
def FeatureStdExtZba
- : SubtargetFeature<"experimental-zba", "HasStdExtZba", "true",
- "'Zba' (Address calculation 'B' Instructions)">;
+ : SubtargetFeature<"zba", "HasStdExtZba", "true",
+ "'Zba' (Address Generation Instructions)">;
def HasStdExtZba : Predicate<"Subtarget->hasStdExtZba()">,
AssemblerPredicate<(all_of FeatureStdExtZba),
- "'Zba' (Address calculation 'B' Instructions)">;
+ "'Zba' (Address Generation Instructions)">;
def NotHasStdExtZba : Predicate<"!Subtarget->hasStdExtZba()">;
def FeatureStdExtZbb
- : SubtargetFeature<"experimental-zbb", "HasStdExtZbb", "true",
- "'Zbb' (Base 'B' Instructions)">;
+ : SubtargetFeature<"zbb", "HasStdExtZbb", "true",
+ "'Zbb' (Basic Bit-Manipulation)">;
def HasStdExtZbb : Predicate<"Subtarget->hasStdExtZbb()">,
AssemblerPredicate<(all_of FeatureStdExtZbb),
- "'Zbb' (Base 'B' Instructions)">;
+ "'Zbb' (Basic Bit-Manipulation)">;
def FeatureStdExtZbc
- : SubtargetFeature<"experimental-zbc", "HasStdExtZbc", "true",
- "'Zbc' (Carry-Less 'B' Instructions)">;
+ : SubtargetFeature<"zbc", "HasStdExtZbc", "true",
+ "'Zbc' (Carry-Less Multiplication)">;
def HasStdExtZbc : Predicate<"Subtarget->hasStdExtZbc()">,
AssemblerPredicate<(all_of FeatureStdExtZbc),
- "'Zbc' (Carry-Less 'B' Instructions)">;
+ "'Zbc' (Carry-Less Multiplication)">;
def FeatureStdExtZbe
: SubtargetFeature<"experimental-zbe", "HasStdExtZbe", "true",
- "'Zbe' (Extract-Deposit 'B' Instructions)">;
+ "'Zbe' (Extract-Deposit 'Zb' Instructions)">;
def HasStdExtZbe : Predicate<"Subtarget->hasStdExtZbe()">,
AssemblerPredicate<(all_of FeatureStdExtZbe),
- "'Zbe' (Extract-Deposit 'B' Instructions)">;
+ "'Zbe' (Extract-Deposit 'Zb' Instructions)">;
def FeatureStdExtZbf
: SubtargetFeature<"experimental-zbf", "HasStdExtZbf", "true",
- "'Zbf' (Bit-Field 'B' Instructions)">;
+ "'Zbf' (Bit-Field 'Zb' Instructions)">;
def HasStdExtZbf : Predicate<"Subtarget->hasStdExtZbf()">,
AssemblerPredicate<(all_of FeatureStdExtZbf),
- "'Zbf' (Bit-Field 'B' Instructions)">;
+ "'Zbf' (Bit-Field 'Zb' Instructions)">;
def FeatureStdExtZbm
: SubtargetFeature<"experimental-zbm", "HasStdExtZbm", "true",
- "'Zbm' (Matrix 'B' Instructions)">;
+ "'Zbm' (Matrix 'Zb' Instructions)">;
def HasStdExtZbm : Predicate<"Subtarget->hasStdExtZbm()">,
AssemblerPredicate<(all_of FeatureStdExtZbm),
- "'Zbm' (Matrix 'B' Instructions)">;
+ "'Zbm' (Matrix 'Zb' Instructions)">;
def FeatureStdExtZbp
: SubtargetFeature<"experimental-zbp", "HasStdExtZbp", "true",
- "'Zbp' (Permutation 'B' Instructions)">;
+ "'Zbp' (Permutation 'Zb' Instructions)">;
def HasStdExtZbp : Predicate<"Subtarget->hasStdExtZbp()">,
AssemblerPredicate<(all_of FeatureStdExtZbp),
- "'Zbp' (Permutation 'B' Instructions)">;
+ "'Zbp' (Permutation 'Zb' Instructions)">;
def FeatureStdExtZbr
: SubtargetFeature<"experimental-zbr", "HasStdExtZbr", "true",
- "'Zbr' (Polynomial Reduction 'B' Instructions)">;
+ "'Zbr' (Polynomial Reduction 'Zb' Instructions)">;
def HasStdExtZbr : Predicate<"Subtarget->hasStdExtZbr()">,
AssemblerPredicate<(all_of FeatureStdExtZbr),
- "'Zbr' (Polynomial Reduction 'B' Instructions)">;
+ "'Zbr' (Polynomial Reduction 'Zb' Instructions)">;
def FeatureStdExtZbs
- : SubtargetFeature<"experimental-zbs", "HasStdExtZbs", "true",
- "'Zbs' (Single-Bit 'B' Instructions)">;
+ : SubtargetFeature<"zbs", "HasStdExtZbs", "true",
+ "'Zbs' (Single-Bit Instructions)">;
def HasStdExtZbs : Predicate<"Subtarget->hasStdExtZbs()">,
AssemblerPredicate<(all_of FeatureStdExtZbs),
- "'Zbs' (Single-Bit 'B' Instructions)">;
+ "'Zbs' (Single-Bit Instructions)">;
def FeatureStdExtZbt
: SubtargetFeature<"experimental-zbt", "HasStdExtZbt", "true",
- "'Zbt' (Ternary 'B' Instructions)">;
+ "'Zbt' (Ternary 'Zb' Instructions)">;
def HasStdExtZbt : Predicate<"Subtarget->hasStdExtZbt()">,
AssemblerPredicate<(all_of FeatureStdExtZbt),
- "'Zbt' (Ternary 'B' Instructions)">;
+ "'Zbt' (Ternary 'Zb' Instructions)">;
// Some instructions belong to both the basic and the permutation
// subextensions. They should be enabled if either has been specified.
def HasStdExtZbbOrZbp
: Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()">,
AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbp),
- "'Zbb' (Base 'B' Instructions) or "
- "'Zbp' (Permutation 'B' Instructions)">;
+ "'Zbb' (Basic Bit-Manipulation) or "
+ "'Zbp' (Permutation 'Zb' Instructions)">;
+
+def FeatureStdExtZbkb
+ : SubtargetFeature<"zbkb", "HasStdExtZbkb", "true",
+ "'Zbkb' (Bitmanip instructions for Cryptography)">;
+def HasStdExtZbkb : Predicate<"Subtarget->hasStdExtZbkb()">,
+ AssemblerPredicate<(all_of FeatureStdExtZbkb),
+ "'Zbkb' (Bitmanip instructions for Cryptography)">;
+
+def FeatureStdExtZbkx
+ : SubtargetFeature<"zbkx", "HasStdExtZbkx", "true",
+ "'Zbkx' (Crossbar permutation instructions)">;
+def HasStdExtZbkx : Predicate<"Subtarget->hasStdExtZbkx()">,
+ AssemblerPredicate<(all_of FeatureStdExtZbkx),
+ "'Zbkx' (Crossbar permutation instructions)">;
+
+def HasStdExtZbpOrZbkx
+ : Predicate<"Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkx()">,
+ AssemblerPredicate<(any_of FeatureStdExtZbp, FeatureStdExtZbkx),
+ "'Zbp' (Permutation 'Zb' Instructions) or "
+ "'Zbkx' (Crossbar permutation instructions)">;
+
+def HasStdExtZbpOrZbkb
+ : Predicate<"Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkb()">,
+ AssemblerPredicate<(any_of FeatureStdExtZbp, FeatureStdExtZbkb),
+ "'Zbp' (Permutation 'Zb' Instructions) or "
+ "'Zbkb' (Bitmanip instructions for Cryptography)">;
+
+def HasStdExtZbbOrZbkb
+ : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()">,
+ AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbkb),
+ "'Zbb' (Basic Bit-Manipulation) or "
+ "'Zbkb' (Bitmanip instructions for Cryptography)">;
+
+def HasStdExtZbbOrZbpOrZbkb
+ : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkb()">,
+ AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbp, FeatureStdExtZbkb),
+ "'Zbb' (Basic Bit-Manipulation) or "
+ "'Zbp' (Permutation 'Zb' Instructions) or "
+ "'Zbkb' (Bitmanip instructions for Cryptography)">;
+
+// The Carry-less multiply subextension for cryptography is a subset of basic carry-less multiply subextension. The former should be enabled if the latter is enabled.
+def FeatureStdExtZbkc
+ : SubtargetFeature<"zbkc", "HasStdExtZbkc", "true",
+ "'Zbkc' (Carry-less multiply instructions for Cryptography)">;
+def HasStdExtZbkc
+ : Predicate<"Subtarget->hasStdExtZbkc()">,
+ AssemblerPredicate<(all_of FeatureStdExtZbkc),
+ "'Zbkc' (Carry-less multiply instructions for Cryptography)">;
+
+def HasStdExtZbcOrZbkc
+ : Predicate<"Subtarget->hasStdExtZbc() || Subtarget->hasStdExtZbkc()">,
+ AssemblerPredicate<(any_of FeatureStdExtZbc, FeatureStdExtZbkc),
+ "'Zbc' (Carry-Less Multiplication) or "
+ "'Zbkc' (Carry-less multiply instructions for Cryptography)">;
+
+def FeatureStdExtZknd
+ : SubtargetFeature<"zknd", "HasStdExtZknd", "true",
+ "'Zknd' (NIST Suite: AES Decryption)">;
+def HasStdExtZknd : Predicate<"Subtarget->hasStdExtZknd()">,
+ AssemblerPredicate<(all_of FeatureStdExtZknd),
+ "'Zknd' (NIST Suite: AES Decryption)">;
+
+def FeatureStdExtZkne
+ : SubtargetFeature<"zkne", "HasStdExtZkne", "true",
+ "'Zkne' (NIST Suite: AES Encryption)">;
+def HasStdExtZkne : Predicate<"Subtarget->hasStdExtZkne()">,
+ AssemblerPredicate<(all_of FeatureStdExtZkne),
+ "'Zkne' (NIST Suite: AES Encryption)">;
+
+// Some instructions belong to both Zknd and Zkne subextensions.
+// They should be enabled if either has been specified.
+def HasStdExtZkndOrZkne
+ : Predicate<"Subtarget->hasStdExtZknd() || Subtarget->hasStdExtZkne()">,
+ AssemblerPredicate<(any_of FeatureStdExtZknd, FeatureStdExtZkne),
+ "'Zknd' (NIST Suite: AES Decryption) or "
+ "'Zkne' (NIST Suite: AES Encryption)">;
+
+def FeatureStdExtZknh
+ : SubtargetFeature<"zknh", "HasStdExtZknh", "true",
+ "'Zknh' (NIST Suite: Hash Function Instructions)">;
+def HasStdExtZknh : Predicate<"Subtarget->hasStdExtZknh()">,
+ AssemblerPredicate<(all_of FeatureStdExtZknh),
+ "'Zknh' (NIST Suite: Hash Function Instructions)">;
+
+def FeatureStdExtZksed
+ : SubtargetFeature<"zksed", "HasStdExtZksed", "true",
+ "'Zksed' (ShangMi Suite: SM4 Block Cipher Instructions)">;
+def HasStdExtZksed : Predicate<"Subtarget->hasStdExtZksed()">,
+ AssemblerPredicate<(all_of FeatureStdExtZksed),
+ "'Zksed' (ShangMi Suite: SM4 Block Cipher Instructions)">;
+
+def FeatureStdExtZksh
+ : SubtargetFeature<"zksh", "HasStdExtZksh", "true",
+ "'Zksh' (ShangMi Suite: SM3 Hash Function Instructions)">;
+def HasStdExtZksh : Predicate<"Subtarget->hasStdExtZksh()">,
+ AssemblerPredicate<(all_of FeatureStdExtZksh),
+ "'Zksh' (ShangMi Suite: SM3 Hash Function Instructions)">;
+
+def FeatureStdExtZkr
+ : SubtargetFeature<"zkr", "HasStdExtZkr", "true",
+ "'Zkr' (Entropy Source Extension)">;
+def HasStdExtZkr : Predicate<"Subtarget->hasStdExtZkr()">,
+ AssemblerPredicate<(all_of FeatureStdExtZkr),
+ "'Zkr' (Entropy Source Extension)">;
+
+def FeatureStdExtZkn
+ : SubtargetFeature<"zkn", "HasStdExtZkn", "true",
+ "'Zkn' (NIST Algorithm Suite)",
+ [FeatureStdExtZbkb,
+ FeatureStdExtZbkc,
+ FeatureStdExtZbkx,
+ FeatureStdExtZkne,
+ FeatureStdExtZknd,
+ FeatureStdExtZknh]>;
+
+def FeatureStdExtZks
+ : SubtargetFeature<"zks", "HasStdExtZks", "true",
+ "'Zks' (ShangMi Algorithm Suite)",
+ [FeatureStdExtZbkb,
+ FeatureStdExtZbkc,
+ FeatureStdExtZbkx,
+ FeatureStdExtZksed,
+ FeatureStdExtZksh]>;
+
+def FeatureStdExtZkt
+ : SubtargetFeature<"zkt", "HasStdExtZkt", "true",
+ "'Zkt' (Data Independent Execution Latency)">;
+
+def FeatureStdExtZk
+ : SubtargetFeature<"zk", "HasStdExtZk", "true",
+ "'Zk' (Standard scalar cryptography extension)",
+ [FeatureStdExtZkn,
+ FeatureStdExtZkr,
+ FeatureStdExtZkt]>;
def FeatureNoRVCHints
: SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false",
@@ -150,23 +284,66 @@ def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">,
AssemblerPredicate<(all_of(not FeatureNoRVCHints)),
"RVC Hint Instructions">;
+def FeatureStdExtZvl32b : SubtargetFeature<"zvl32b", "ZvlLen", "ExtZvl::Zvl32b",
+ "'Zvl' (Minimum Vector Length) 32">;
+
+foreach i = { 6-15 } in {
+ defvar I = !shl(1, i);
+ def FeatureStdExtZvl#I#b :
+ SubtargetFeature<"zvl"#I#"b", "ZvlLen", "ExtZvl::Zvl"#I#"b",
+ "'Zvl' (Minimum Vector Length) "#I,
+ [!cast<SubtargetFeature>("FeatureStdExtZvl"#!srl(I, 1)#"b")]>;
+}
+
+def FeatureStdExtZve32x
+ : SubtargetFeature<"zve32x", "HasStdExtZve32x", "true",
+ "'Zve32x' (Vector Extensions for Embedded Processors "
+ "with maximal 32 EEW)",
+ [FeatureStdExtZvl32b]>;
+
+def FeatureStdExtZve32f
+ : SubtargetFeature<"zve32f", "HasStdExtZve32f", "true",
+ "'Zve32f' (Vector Extensions for Embedded Processors "
+ "with maximal 32 EEW and F extension)",
+ [FeatureStdExtZve32x]>;
+
+def FeatureStdExtZve64x
+ : SubtargetFeature<"zve64x", "HasStdExtZve64x", "true",
+ "'Zve64x' (Vector Extensions for Embedded Processors "
+ "with maximal 64 EEW)", [FeatureStdExtZve32x, FeatureStdExtZvl64b]>;
+
+def FeatureStdExtZve64f
+ : SubtargetFeature<"zve64f", "HasStdExtZve64f", "true",
+ "'Zve64f' (Vector Extensions for Embedded Processors "
+ "with maximal 64 EEW and F extension)",
+ [FeatureStdExtZve32f, FeatureStdExtZve64x]>;
+
+def FeatureStdExtZve64d
+ : SubtargetFeature<"zve64d", "HasStdExtZve64d", "true",
+ "'Zve64d' (Vector Extensions for Embedded Processors "
+ "with maximal 64 EEW, F and D extension)",
+ [FeatureStdExtZve64f]>;
+
def FeatureStdExtV
- : SubtargetFeature<"experimental-v", "HasStdExtV", "true",
- "'V' (Vector Instructions)">;
-def HasStdExtV : Predicate<"Subtarget->hasStdExtV()">,
- AssemblerPredicate<(all_of FeatureStdExtV),
- "'V' (Vector Instructions)">;
-
-def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">;
-def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">;
-
-def FeatureStdExtZvlsseg
- : SubtargetFeature<"experimental-zvlsseg", "HasStdExtZvlsseg", "true",
- "'Zvlsseg' (Vector segment load/store instructions)",
- [FeatureStdExtV]>;
-def HasStdExtZvlsseg : Predicate<"Subtarget->hasStdExtZvlsseg()">,
- AssemblerPredicate<(all_of FeatureStdExtZvlsseg),
- "'Zvlsseg' (Vector segment load/store instructions)">;
+ : SubtargetFeature<"v", "HasStdExtV", "true",
+ "'V' (Vector Extension for Application Processors)",
+ [FeatureStdExtZvl128b, FeatureStdExtF, FeatureStdExtD]>;
+
+def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">,
+ AssemblerPredicate<
+ (any_of FeatureStdExtZve32x, FeatureStdExtV),
+ "'V' (Vector Extension for Application Processors), 'Zve32x' or "
+ "'Zve64x' (Vector Extensions for Embedded Processors)">;
+def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
+ AssemblerPredicate<
+ (any_of FeatureStdExtZve64x, FeatureStdExtV),
+ "'V' (Vector Extension for Application Processors) or 'Zve64x' "
+ "(Vector Extensions for Embedded Processors)">;
+def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">,
+ AssemblerPredicate<
+ (any_of FeatureStdExtZve32f, FeatureStdExtV),
+ "'V' (Vector Extension for Application Processors), 'Zve32f', "
+ "'Zve64f' or 'Zve64d' (Vector Extensions for Embedded Processors)">;
def Feature64Bit
: SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">;
@@ -198,6 +375,9 @@ foreach i = {1-31} in
def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore",
"true", "Enable save/restore.">;
+def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
+ "SiFive 7-Series processors">;
+
//===----------------------------------------------------------------------===//
// Named operands for CSR instructions.
//===----------------------------------------------------------------------===//
@@ -226,8 +406,10 @@ def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>;
def : ProcessorModel<"rocket-rv32", RocketModel, []>;
def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>;
-def : ProcessorModel<"sifive-7-rv32", SiFive7Model, []>;
-def : ProcessorModel<"sifive-7-rv64", SiFive7Model, [Feature64Bit]>;
+def : ProcessorModel<"sifive-7-rv32", SiFive7Model, [],
+ [TuneSiFive7]>;
+def : ProcessorModel<"sifive-7-rv64", SiFive7Model, [Feature64Bit],
+ [TuneSiFive7]>;
def : ProcessorModel<"sifive-e20", RocketModel, [FeatureStdExtM,
FeatureStdExtC]>;
@@ -253,7 +435,8 @@ def : ProcessorModel<"sifive-e34", RocketModel, [FeatureStdExtM,
def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
- FeatureStdExtC]>;
+ FeatureStdExtC],
+ [TuneSiFive7]>;
def : ProcessorModel<"sifive-s21", RocketModel, [Feature64Bit,
FeatureStdExtM,
@@ -277,7 +460,8 @@ def : ProcessorModel<"sifive-s76", SiFive7Model, [Feature64Bit,
FeatureStdExtA,
FeatureStdExtF,
FeatureStdExtD,
- FeatureStdExtC]>;
+ FeatureStdExtC],
+ [TuneSiFive7]>;
def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit,
FeatureStdExtM,
@@ -291,7 +475,8 @@ def : ProcessorModel<"sifive-u74", SiFive7Model, [Feature64Bit,
FeatureStdExtA,
FeatureStdExtF,
FeatureStdExtD,
- FeatureStdExtC]>;
+ FeatureStdExtC],
+ [TuneSiFive7]>;
//===----------------------------------------------------------------------===//
// Define the RISC-V target.
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index f5d491938050..ad003404d793 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -242,7 +242,8 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
// adjustment, we can not use SP to access the stack objects for the
// arguments. Instead, use BP to access these stack objects.
return (MFI.hasVarSizedObjects() ||
- (!hasReservedCallFrame(MF) && MFI.getMaxCallFrameSize() != 0)) &&
+ (!hasReservedCallFrame(MF) && (!MFI.isMaxCallFrameSizeComputed() ||
+ MFI.getMaxCallFrameSize() != 0))) &&
TRI->hasStackRealignment(MF);
}
@@ -940,11 +941,22 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
}
static bool hasRVVFrameObject(const MachineFunction &MF) {
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I)
- if (MFI.getStackID(I) == TargetStackID::ScalableVector)
- return true;
- return false;
+ // Originally, the function will scan all the stack objects to check whether
+ // if there is any scalable vector object on the stack or not. However, it
+ // causes errors in the register allocator. In issue 53016, it returns false
+ // before RA because there is no RVV stack objects. After RA, it returns true
+ // because there are spilling slots for RVV values during RA. It will not
+ // reserve BP during register allocation and generate BP access in the PEI
+ // pass due to the inconsistent behavior of the function.
+ //
+ // The function is changed to use hasVInstructions() as the return value. It
+ // is not precise, but it can make the register allocation correct.
+ //
+ // FIXME: Find a better way to make the decision or revisit the solution in
+ // D103622.
+ //
+ // Refer to https://github.com/llvm/llvm-project/issues/53016.
+ return MF.getSubtarget<RISCVSubtarget>().hasVInstructions();
}
// Not preserve stack space within prologue for outgoing variables when the
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index d47bd739235f..ba91b16661a4 100644
--- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -127,6 +127,41 @@ static std::pair<Value *, Value *> matchStridedConstant(Constant *StartC) {
return std::make_pair(StartVal, Stride);
}
+static std::pair<Value *, Value *> matchStridedStart(Value *Start,
+ IRBuilder<> &Builder) {
+ // Base case, start is a strided constant.
+ auto *StartC = dyn_cast<Constant>(Start);
+ if (StartC)
+ return matchStridedConstant(StartC);
+
+ // Not a constant, maybe it's a strided constant with a splat added to it.
+ auto *BO = dyn_cast<BinaryOperator>(Start);
+ if (!BO || BO->getOpcode() != Instruction::Add)
+ return std::make_pair(nullptr, nullptr);
+
+ // Look for an operand that is splatted.
+ unsigned OtherIndex = 1;
+ Value *Splat = getSplatValue(BO->getOperand(0));
+ if (!Splat) {
+ Splat = getSplatValue(BO->getOperand(1));
+ OtherIndex = 0;
+ }
+ if (!Splat)
+ return std::make_pair(nullptr, nullptr);
+
+ Value *Stride;
+ std::tie(Start, Stride) = matchStridedStart(BO->getOperand(OtherIndex),
+ Builder);
+ if (!Start)
+ return std::make_pair(nullptr, nullptr);
+
+ // Add the splat value to the start.
+ Builder.SetInsertPoint(BO);
+ Builder.SetCurrentDebugLocation(DebugLoc());
+ Start = Builder.CreateAdd(Start, Splat);
+ return std::make_pair(Start, Stride);
+}
+
// Recursively, walk about the use-def chain until we find a Phi with a strided
// start value. Build and update a scalar recurrence as we unwind the recursion.
// We also update the Stride as we unwind. Our goal is to move all of the
@@ -161,12 +196,7 @@ bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
if (!Step)
return false;
- // Start should be a strided constant.
- auto *StartC = dyn_cast<Constant>(Start);
- if (!StartC)
- return false;
-
- std::tie(Start, Stride) = matchStridedConstant(StartC);
+ std::tie(Start, Stride) = matchStridedStart(Start, Builder);
if (!Start)
return false;
assert(Stride != nullptr);
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index b24eb5f7bbf4..5870502d74d5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -86,8 +86,12 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
SDValue IntID =
CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
- SDValue Ops[] = {Chain, IntID, StackSlot,
- CurDAG->getRegister(RISCV::X0, MVT::i64), VL};
+ SDValue Ops[] = {Chain,
+ IntID,
+ CurDAG->getUNDEF(VT),
+ StackSlot,
+ CurDAG->getRegister(RISCV::X0, MVT::i64),
+ VL};
SDValue Result = CurDAG->getMemIntrinsicNode(
ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8),
@@ -125,12 +129,37 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
CurDAG->RemoveDeadNodes();
}
-static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
- const RISCVSubtarget &Subtarget) {
+static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL,
+ const MVT VT, int64_t Imm,
+ const RISCVSubtarget &Subtarget) {
+ assert(VT == MVT::i64 && "Expecting MVT::i64");
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool(
+ ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT));
+ SDValue Addr = TLI->getAddr(CP, *CurDAG);
+ SDValue Offset = CurDAG->getTargetConstant(0, DL, VT);
+ // Since there is no data race, the chain can be the entry node.
+ SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset,
+ CurDAG->getEntryNode());
+ MachineFunction &MF = CurDAG->getMachineFunction();
+ MachineMemOperand *MemOp = MF.getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
+ LLT(VT), CP->getAlign());
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp});
+ return Load;
+}
+
+static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
+ int64_t Imm, const RISCVSubtarget &Subtarget) {
MVT XLenVT = Subtarget.getXLenVT();
RISCVMatInt::InstSeq Seq =
RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
+ // If Imm is expensive to build, then we put it into constant pool.
+ if (Subtarget.useConstantPoolForLargeInts() &&
+ Seq.size() > Subtarget.getMaxBuildIntsCost())
+ return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget);
+
SDNode *Result = nullptr;
SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
for (RISCVMatInt::Inst &Inst : Seq) {
@@ -372,6 +401,10 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
+ if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
+ report_fatal_error("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
+ }
const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
static_cast<unsigned>(IndexLMUL));
@@ -450,6 +483,10 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
+ if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
+ report_fatal_error("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
+ }
const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
static_cast<unsigned>(IndexLMUL));
@@ -462,6 +499,75 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
ReplaceNode(Node, Store);
}
+void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
+ if (!Subtarget->hasVInstructions())
+ return;
+
+ assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) &&
+ "Unexpected opcode");
+
+ SDLoc DL(Node);
+ MVT XLenVT = Subtarget->getXLenVT();
+
+ bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN;
+ unsigned IntNoOffset = HasChain ? 1 : 0;
+ unsigned IntNo = Node->getConstantOperandVal(IntNoOffset);
+
+ assert((IntNo == Intrinsic::riscv_vsetvli ||
+ IntNo == Intrinsic::riscv_vsetvlimax ||
+ IntNo == Intrinsic::riscv_vsetvli_opt ||
+ IntNo == Intrinsic::riscv_vsetvlimax_opt) &&
+ "Unexpected vsetvli intrinsic");
+
+ bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax ||
+ IntNo == Intrinsic::riscv_vsetvlimax_opt;
+ unsigned Offset = IntNoOffset + (VLMax ? 1 : 2);
+
+ assert(Node->getNumOperands() == Offset + 2 &&
+ "Unexpected number of operands");
+
+ unsigned SEW =
+ RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
+ RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
+ Node->getConstantOperandVal(Offset + 1) & 0x7);
+
+ unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
+ /*MaskAgnostic*/ false);
+ SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
+
+ SmallVector<EVT, 2> VTs = {XLenVT};
+ if (HasChain)
+ VTs.push_back(MVT::Other);
+
+ SDValue VLOperand;
+ unsigned Opcode = RISCV::PseudoVSETVLI;
+ if (VLMax) {
+ VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
+ Opcode = RISCV::PseudoVSETVLIX0;
+ } else {
+ VLOperand = Node->getOperand(IntNoOffset + 1);
+
+ if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
+ uint64_t AVL = C->getZExtValue();
+ if (isUInt<5>(AVL)) {
+ SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
+ SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp};
+ if (HasChain)
+ Ops.push_back(Node->getOperand(0));
+ ReplaceNode(
+ Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
+ return;
+ }
+ }
+ }
+
+ SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp};
+ if (HasChain)
+ Ops.push_back(Node->getOperand(0));
+
+ ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
+}
void RISCVDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we have already selected.
@@ -498,7 +604,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
Imm = SignExtend64(Imm, 32);
- ReplaceNode(Node, selectImm(CurDAG, DL, Imm, *Subtarget));
+ ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
return;
}
case ISD::FrameIndex: {
@@ -509,38 +615,69 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
case ISD::SRL: {
- // We don't need this transform if zext.h is supported.
- if (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())
+ // Optimize (srl (and X, C2), C) ->
+ // (srli (slli X, (XLen-C3), (XLen-C3) + C)
+ // Where C2 is a mask with C3 trailing ones.
+ // Taking into account that the C2 may have had lower bits unset by
+ // SimplifyDemandedBits. This avoids materializing the C2 immediate.
+ // This pattern occurs when type legalizing right shifts for types with
+ // less than XLen bits.
+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!N1C)
+ break;
+ SDValue N0 = Node->getOperand(0);
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
+ !isa<ConstantSDNode>(N0.getOperand(1)))
+ break;
+ unsigned ShAmt = N1C->getZExtValue();
+ uint64_t Mask = N0.getConstantOperandVal(1);
+ Mask |= maskTrailingOnes<uint64_t>(ShAmt);
+ if (!isMask_64(Mask))
+ break;
+ unsigned TrailingOnes = countTrailingOnes(Mask);
+ // 32 trailing ones should use srliw via tablegen pattern.
+ if (TrailingOnes == 32 || ShAmt >= TrailingOnes)
break;
- // Optimize (srl (and X, 0xffff), C) ->
- // (srli (slli X, (XLen-16), (XLen-16) + C)
- // Taking into account that the 0xffff may have had lower bits unset by
- // SimplifyDemandedBits. This avoids materializing the 0xffff immediate.
- // This pattern occurs when type legalizing i16 right shifts.
- // FIXME: This could be extended to other AND masks.
+ unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
+ CurDAG->getTargetConstant(LShAmt, DL, VT));
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
+ ReplaceNode(Node, SRLI);
+ return;
+ }
+ case ISD::SRA: {
+ // Optimize (sra (sext_inreg X, i16), C) ->
+ // (srai (slli X, (XLen-16), (XLen-16) + C)
+ // And (sra (sext_inreg X, i8), C) ->
+ // (srai (slli X, (XLen-8), (XLen-8) + C)
+ // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
+ // This transform matches the code we get without Zbb. The shifts are more
+ // compressible, and this can help expose CSE opportunities in the sdiv by
+ // constant optimization.
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
- if (N1C) {
- uint64_t ShAmt = N1C->getZExtValue();
- SDValue N0 = Node->getOperand(0);
- if (ShAmt < 16 && N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
- isa<ConstantSDNode>(N0.getOperand(1))) {
- uint64_t Mask = N0.getConstantOperandVal(1);
- Mask |= maskTrailingOnes<uint64_t>(ShAmt);
- if (Mask == 0xffff) {
- unsigned LShAmt = Subtarget->getXLen() - 16;
- SDNode *SLLI =
- CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
- CurDAG->getTargetConstant(LShAmt, DL, VT));
- SDNode *SRLI = CurDAG->getMachineNode(
- RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
- CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
- ReplaceNode(Node, SRLI);
- return;
- }
- }
- }
-
- break;
+ if (!N1C)
+ break;
+ SDValue N0 = Node->getOperand(0);
+ if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
+ break;
+ unsigned ShAmt = N1C->getZExtValue();
+ unsigned ExtSize =
+ cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
+ // ExtSize of 32 should use sraiw via tablegen pattern.
+ if (ExtSize >= 32 || ShAmt >= ExtSize)
+ break;
+ unsigned LShAmt = Subtarget->getXLen() - ExtSize;
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
+ CurDAG->getTargetConstant(LShAmt, DL, VT));
+ SDNode *SRAI = CurDAG->getMachineNode(
+ RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
+ ReplaceNode(Node, SRAI);
+ return;
}
case ISD::AND: {
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
@@ -774,7 +911,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ShiftedC1 = SignExtend64(ShiftedC1, 32);
// Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
- SDNode *Imm = selectImm(CurDAG, DL, ShiftedC1, *Subtarget);
+ SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget);
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
CurDAG->getTargetConstant(LeadingZeros, DL, VT));
@@ -793,62 +930,52 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::riscv_vmsge: {
SDValue Src1 = Node->getOperand(1);
SDValue Src2 = Node->getOperand(2);
+ bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
+ bool IsCmpUnsignedZero = false;
// Only custom select scalar second operand.
if (Src2.getValueType() != XLenVT)
break;
// Small constants are handled with patterns.
if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
int64_t CVal = C->getSExtValue();
- if (CVal >= -15 && CVal <= 16)
- break;
+ if (CVal >= -15 && CVal <= 16) {
+ if (!IsUnsigned || CVal != 0)
+ break;
+ IsCmpUnsignedZero = true;
+ }
}
- bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
MVT Src1VT = Src1.getSimpleValueType();
- unsigned VMSLTOpcode, VMNANDOpcode;
+ unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
switch (RISCVTargetLowering::getLMUL(Src1VT)) {
default:
llvm_unreachable("Unexpected LMUL!");
- case RISCVII::VLMUL::LMUL_F8:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF8 : RISCV::PseudoVMSLT_VX_MF8;
- VMNANDOpcode = RISCV::PseudoVMNAND_MM_MF8;
- break;
- case RISCVII::VLMUL::LMUL_F4:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF4 : RISCV::PseudoVMSLT_VX_MF4;
- VMNANDOpcode = RISCV::PseudoVMNAND_MM_MF4;
- break;
- case RISCVII::VLMUL::LMUL_F2:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF2 : RISCV::PseudoVMSLT_VX_MF2;
- VMNANDOpcode = RISCV::PseudoVMNAND_MM_MF2;
- break;
- case RISCVII::VLMUL::LMUL_1:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_M1 : RISCV::PseudoVMSLT_VX_M1;
- VMNANDOpcode = RISCV::PseudoVMNAND_MM_M1;
- break;
- case RISCVII::VLMUL::LMUL_2:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_M2 : RISCV::PseudoVMSLT_VX_M2;
- VMNANDOpcode = RISCV::PseudoVMNAND_MM_M2;
- break;
- case RISCVII::VLMUL::LMUL_4:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_M4 : RISCV::PseudoVMSLT_VX_M4;
- VMNANDOpcode = RISCV::PseudoVMNAND_MM_M4;
- break;
- case RISCVII::VLMUL::LMUL_8:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_M8 : RISCV::PseudoVMSLT_VX_M8;
- VMNANDOpcode = RISCV::PseudoVMNAND_MM_M8;
- break;
+#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
+ case RISCVII::VLMUL::lmulenum: \
+ VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
+ : RISCV::PseudoVMSLT_VX_##suffix; \
+ VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
+ VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
+ break;
+ CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
+ CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
+ CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
+ CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)
+ CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
+ CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
+ CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
+#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
}
SDValue SEW = CurDAG->getTargetConstant(
Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
SDValue VL;
selectVLOp(Node->getOperand(3), VL);
+ // If vmsgeu with 0 immediate, expand it to vmset.
+ if (IsCmpUnsignedZero) {
+ ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
+ return;
+ }
+
// Expand to
// vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
SDValue Cmp = SDValue(
@@ -862,96 +989,61 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::riscv_vmsge_mask: {
SDValue Src1 = Node->getOperand(2);
SDValue Src2 = Node->getOperand(3);
+ bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
+ bool IsCmpUnsignedZero = false;
// Only custom select scalar second operand.
if (Src2.getValueType() != XLenVT)
break;
// Small constants are handled with patterns.
if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
int64_t CVal = C->getSExtValue();
- if (CVal >= -15 && CVal <= 16)
- break;
+ if (CVal >= -15 && CVal <= 16) {
+ if (!IsUnsigned || CVal != 0)
+ break;
+ IsCmpUnsignedZero = true;
+ }
}
- bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
MVT Src1VT = Src1.getSimpleValueType();
- unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode;
+ unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
+ VMSetOpcode, VMANDOpcode;
switch (RISCVTargetLowering::getLMUL(Src1VT)) {
default:
llvm_unreachable("Unexpected LMUL!");
- case RISCVII::VLMUL::LMUL_F8:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF8 : RISCV::PseudoVMSLT_VX_MF8;
- VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF8_MASK
- : RISCV::PseudoVMSLT_VX_MF8_MASK;
- break;
- case RISCVII::VLMUL::LMUL_F4:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF4 : RISCV::PseudoVMSLT_VX_MF4;
- VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF4_MASK
- : RISCV::PseudoVMSLT_VX_MF4_MASK;
- break;
- case RISCVII::VLMUL::LMUL_F2:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF2 : RISCV::PseudoVMSLT_VX_MF2;
- VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_MF2_MASK
- : RISCV::PseudoVMSLT_VX_MF2_MASK;
- break;
- case RISCVII::VLMUL::LMUL_1:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_M1 : RISCV::PseudoVMSLT_VX_M1;
- VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M1_MASK
- : RISCV::PseudoVMSLT_VX_M1_MASK;
- break;
- case RISCVII::VLMUL::LMUL_2:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_M2 : RISCV::PseudoVMSLT_VX_M2;
- VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M2_MASK
- : RISCV::PseudoVMSLT_VX_M2_MASK;
- break;
- case RISCVII::VLMUL::LMUL_4:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_M4 : RISCV::PseudoVMSLT_VX_M4;
- VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M4_MASK
- : RISCV::PseudoVMSLT_VX_M4_MASK;
- break;
- case RISCVII::VLMUL::LMUL_8:
- VMSLTOpcode =
- IsUnsigned ? RISCV::PseudoVMSLTU_VX_M8 : RISCV::PseudoVMSLT_VX_M8;
- VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_M8_MASK
- : RISCV::PseudoVMSLT_VX_M8_MASK;
- break;
+#define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
+ case RISCVII::VLMUL::lmulenum: \
+ VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
+ : RISCV::PseudoVMSLT_VX_##suffix; \
+ VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
+ : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
+ VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
+ break;
+ CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1)
+ CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2)
+ CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4)
+ CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8)
+ CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16)
+ CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32)
+ CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64)
+#undef CASE_VMSLT_VMSET_OPCODES
}
// Mask operations use the LMUL from the mask type.
switch (RISCVTargetLowering::getLMUL(VT)) {
default:
llvm_unreachable("Unexpected LMUL!");
- case RISCVII::VLMUL::LMUL_F8:
- VMXOROpcode = RISCV::PseudoVMXOR_MM_MF8;
- VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF8;
- break;
- case RISCVII::VLMUL::LMUL_F4:
- VMXOROpcode = RISCV::PseudoVMXOR_MM_MF4;
- VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF4;
- break;
- case RISCVII::VLMUL::LMUL_F2:
- VMXOROpcode = RISCV::PseudoVMXOR_MM_MF2;
- VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF2;
- break;
- case RISCVII::VLMUL::LMUL_1:
- VMXOROpcode = RISCV::PseudoVMXOR_MM_M1;
- VMANDNOpcode = RISCV::PseudoVMANDN_MM_M1;
- break;
- case RISCVII::VLMUL::LMUL_2:
- VMXOROpcode = RISCV::PseudoVMXOR_MM_M2;
- VMANDNOpcode = RISCV::PseudoVMANDN_MM_M2;
- break;
- case RISCVII::VLMUL::LMUL_4:
- VMXOROpcode = RISCV::PseudoVMXOR_MM_M4;
- VMANDNOpcode = RISCV::PseudoVMANDN_MM_M4;
- break;
- case RISCVII::VLMUL::LMUL_8:
- VMXOROpcode = RISCV::PseudoVMXOR_MM_M8;
- VMANDNOpcode = RISCV::PseudoVMANDN_MM_M8;
- break;
+#define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix) \
+ case RISCVII::VLMUL::lmulenum: \
+ VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
+ VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix; \
+ break;
+ CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8)
+ CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4)
+ CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2)
+ CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1)
+ CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2)
+ CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4)
+ CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8)
+#undef CASE_VMXOR_VMANDN_VMAND_OPCODES
}
SDValue SEW = CurDAG->getTargetConstant(
Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
@@ -960,6 +1052,16 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
selectVLOp(Node->getOperand(5), VL);
SDValue MaskedOff = Node->getOperand(1);
SDValue Mask = Node->getOperand(4);
+
+ // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}.
+ if (IsCmpUnsignedZero) {
+ SDValue VMSet =
+ SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0);
+ ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT,
+ {Mask, VMSet, VL, MaskSEW}));
+ return;
+ }
+
// If the MaskedOff value and the Mask are the same value use
// vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
// This avoids needing to copy v0 to vd before starting the next sequence.
@@ -988,6 +1090,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
{Cmp, Mask, VL, MaskSEW}));
return;
}
+ case Intrinsic::riscv_vsetvli_opt:
+ case Intrinsic::riscv_vsetvlimax_opt:
+ return selectVSETVLI(Node);
}
break;
}
@@ -997,54 +1102,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// By default we do not custom select any intrinsic.
default:
break;
-
case Intrinsic::riscv_vsetvli:
- case Intrinsic::riscv_vsetvlimax: {
- if (!Subtarget->hasVInstructions())
- break;
-
- bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
- unsigned Offset = VLMax ? 2 : 3;
-
- assert(Node->getNumOperands() == Offset + 2 &&
- "Unexpected number of operands");
-
- unsigned SEW =
- RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
- RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
- Node->getConstantOperandVal(Offset + 1) & 0x7);
-
- unsigned VTypeI = RISCVVType::encodeVTYPE(
- VLMul, SEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false);
- SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
-
- SDValue VLOperand;
- unsigned Opcode = RISCV::PseudoVSETVLI;
- if (VLMax) {
- VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
- Opcode = RISCV::PseudoVSETVLIX0;
- } else {
- VLOperand = Node->getOperand(2);
-
- if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
- uint64_t AVL = C->getZExtValue();
- if (isUInt<5>(AVL)) {
- SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
- ReplaceNode(
- Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT,
- MVT::Other, VLImm, VTypeIOp,
- /* Chain */ Node->getOperand(0)));
- return;
- }
- }
- }
-
- ReplaceNode(Node,
- CurDAG->getMachineNode(Opcode, DL, XLenVT,
- MVT::Other, VLOperand, VTypeIOp,
- /* Chain */ Node->getOperand(0)));
- return;
- }
+ case Intrinsic::riscv_vsetvlimax:
+ return selectVSETVLI(Node);
case Intrinsic::riscv_vlseg2:
case Intrinsic::riscv_vlseg3:
case Intrinsic::riscv_vlseg4:
@@ -1154,9 +1214,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
unsigned CurOp = 2;
+ // Masked intrinsic only have TU version pseduo instructions.
+ bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
SmallVector<SDValue, 8> Operands;
- if (IsMasked)
+ if (IsTU)
Operands.push_back(Node->getOperand(CurOp++));
+ else
+ // Skip the undef passthru operand for nomask TA version pseudo
+ CurOp++;
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
@@ -1169,8 +1234,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
+ if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
+ report_fatal_error("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
+ }
const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
- IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
+ IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
static_cast<unsigned>(IndexLMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
@@ -1195,16 +1264,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
unsigned CurOp = 2;
+ // The riscv_vlm intrinsic are always tail agnostic and no passthru operand.
+ bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
+ // Masked intrinsic only have TU version pseduo instructions.
+ bool IsTU =
+ HasPassthruOperand &&
+ ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked);
SmallVector<SDValue, 8> Operands;
- if (IsMasked)
+ if (IsTU)
Operands.push_back(Node->getOperand(CurOp++));
+ else if (HasPassthruOperand)
+ // Skip the undef passthru operand for nomask TA version pseudo
+ CurOp++;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
Operands, /*IsLoad=*/true);
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
const RISCV::VLEPseudo *P =
- RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
+ RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
static_cast<unsigned>(LMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
@@ -1223,9 +1301,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
unsigned CurOp = 2;
+ // Masked intrinsic only have TU version pseduo instructions.
+ bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
SmallVector<SDValue, 7> Operands;
- if (IsMasked)
+ if (IsTU)
Operands.push_back(Node->getOperand(CurOp++));
+ else
+ // Skip the undef passthru operand for nomask TA version pseudo
+ CurOp++;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
/*IsStridedOrIndexed*/ false, Operands,
@@ -1233,8 +1316,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
const RISCV::VLEPseudo *P =
- RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, Log2SEW,
- static_cast<unsigned>(LMUL));
+ RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
+ Log2SEW, static_cast<unsigned>(LMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0),
MVT::Other, MVT::Glue, Operands);
@@ -1359,9 +1442,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
+ if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
+ report_fatal_error("The V extension does not support EEW=64 for index "
+ "values when XLEN=32");
+ }
const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
- IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
- static_cast<unsigned>(IndexLMUL));
+ IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW,
+ static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
MachineSDNode *Store =
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
@@ -1516,10 +1603,16 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
case ISD::SPLAT_VECTOR:
+ case RISCVISD::VMV_S_X_VL:
+ case RISCVISD::VFMV_S_F_VL:
case RISCVISD::VMV_V_X_VL:
case RISCVISD::VFMV_V_F_VL: {
// Try to match splat of a scalar load to a strided load with stride of x0.
- SDValue Src = Node->getOperand(0);
+ bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
+ Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
+ if (IsScalarMove && !Node->getOperand(0).isUndef())
+ break;
+ SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0);
auto *Ld = dyn_cast<LoadSDNode>(Src);
if (!Ld)
break;
@@ -1534,7 +1627,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDValue VL;
if (Node->getOpcode() == ISD::SPLAT_VECTOR)
VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
- else
+ else if (IsScalarMove) {
+ // We could deal with more VL if we update the VSETVLI insert pass to
+ // avoid introducing more VSETVLI.
+ if (!isOneConstant(Node->getOperand(2)))
+ break;
+ selectVLOp(Node->getOperand(2), VL);
+ } else
selectVLOp(Node->getOperand(1), VL);
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
@@ -1546,8 +1645,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
- /*IsMasked*/ false, /*IsStrided*/ true, /*FF*/ false, Log2SEW,
- static_cast<unsigned>(LMUL));
+ /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false,
+ Log2SEW, static_cast<unsigned>(LMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
@@ -1727,6 +1826,20 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
return false;
break;
+ case RISCV::ANDI:
+ if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1))))
+ return false;
+ break;
+ case RISCV::SEXTB:
+ if (Bits < 8)
+ return false;
+ break;
+ case RISCV::SEXTH:
+ case RISCV::ZEXTH_RV32:
+ case RISCV::ZEXTH_RV64:
+ if (Bits < 16)
+ return false;
+ break;
case RISCV::ADDUW:
case RISCV::SH1ADDUW:
case RISCV::SH2ADDUW:
@@ -1758,7 +1871,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
// allows us to choose betwen VSETIVLI or VSETVLI later.
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
auto *C = dyn_cast<ConstantSDNode>(N);
- if (C && isUInt<5>(C->getZExtValue()))
+ if (C && (isUInt<5>(C->getZExtValue()) ||
+ C->getSExtValue() == RISCV::VLMaxSentinel))
VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
N->getValueType(0));
else
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index a2770089995d..c429a9298739 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -87,6 +87,8 @@ public:
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided);
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered);
+ void selectVSETVLI(SDNode *Node);
+
// Return the RISC-V condition code that matches the given DAG integer
// condition code. The CondCode must be one of those supported by the RISC-V
// ISA (see translateSetCCForBranch).
@@ -159,6 +161,7 @@ struct VSXSEGPseudo {
struct VLEPseudo {
uint16_t Masked : 1;
+ uint16_t IsTU : 1;
uint16_t Strided : 1;
uint16_t FF : 1;
uint16_t Log2SEW : 3;
@@ -176,6 +179,7 @@ struct VSEPseudo {
struct VLX_VSXPseudo {
uint16_t Masked : 1;
+ uint16_t IsTU : 1;
uint16_t Ordered : 1;
uint16_t Log2SEW : 3;
uint16_t LMUL : 3;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4f5512e6fb37..5cc3aa35d4d2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -249,7 +250,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
- if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
+ if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
+ Subtarget.hasStdExtZbkb()) {
if (Subtarget.is64Bit()) {
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::ROTR, MVT::i32, Custom);
@@ -277,7 +279,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
// pattern match it directly in isel.
setOperationAction(ISD::BSWAP, XLenVT,
- Subtarget.hasStdExtZbb() ? Legal : Expand);
+ (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
+ ? Legal
+ : Expand);
}
if (Subtarget.hasStdExtZbb()) {
@@ -330,6 +334,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LLRINT, MVT::f16, Legal);
setOperationAction(ISD::LROUND, MVT::f16, Legal);
setOperationAction(ISD::LLROUND, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_LRINT, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_LLRINT, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_LROUND, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_LLROUND, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal);
@@ -338,6 +346,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
@@ -363,6 +373,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+ // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
+ // complete support for all operations in LegalizeDAG.
+
// We need to custom promote this.
if (Subtarget.is64Bit())
setOperationAction(ISD::FPOWI, MVT::i32, Custom);
@@ -375,12 +388,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LLRINT, MVT::f32, Legal);
setOperationAction(ISD::LROUND, MVT::f32, Legal);
setOperationAction(ISD::LLROUND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_LRINT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_LROUND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
@@ -402,6 +421,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LLRINT, MVT::f64, Legal);
setOperationAction(ISD::LROUND, MVT::f64, Legal);
setOperationAction(ISD::LLROUND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_LRINT, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_LROUND, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
@@ -410,6 +433,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
@@ -499,12 +524,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
- ISD::VP_SELECT};
+ ISD::VP_MERGE, ISD::VP_SELECT};
static const unsigned FloatingPointVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
- ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SELECT};
+ ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
+ ISD::VP_SELECT};
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
@@ -546,6 +572,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::VP_MERGE, VT, Expand);
+ setOperationAction(ISD::VP_SELECT, VT, Expand);
setOperationAction(ISD::VP_AND, VT, Custom);
setOperationAction(ISD::VP_OR, VT, Custom);
@@ -590,6 +618,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
+ if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) {
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
+ }
+
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
@@ -886,8 +920,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
- setOperationAction(ISD::MULHS, VT, Custom);
- setOperationAction(ISD::MULHU, VT, Custom);
+ // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
+ if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) {
+ setOperationAction(ISD::MULHS, VT, Custom);
+ setOperationAction(ISD::MULHU, VT, Custom);
+ }
setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Custom);
@@ -1002,9 +1039,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
- setOperationAction(ISD::BITCAST, MVT::f16, Custom);
- setOperationAction(ISD::BITCAST, MVT::f32, Custom);
- setOperationAction(ISD::BITCAST, MVT::f64, Custom);
+ if (Subtarget.hasStdExtZfh())
+ setOperationAction(ISD::BITCAST, MVT::f16, Custom);
+ if (Subtarget.hasStdExtF())
+ setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+ if (Subtarget.hasStdExtD())
+ setOperationAction(ISD::BITCAST, MVT::f64, Custom);
}
}
@@ -1024,7 +1064,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::ANY_EXTEND);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
+ if (Subtarget.hasStdExtF()) {
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
+ setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
+ setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
+ }
if (Subtarget.hasVInstructions()) {
setTargetDAGCombine(ISD::FCOPYSIGN);
setTargetDAGCombine(ISD::MGATHER);
@@ -1072,7 +1118,7 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::riscv_masked_cmpxchg_i32: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = Align(4);
@@ -1158,10 +1204,11 @@ bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
// Zexts are free if they can be combined with a load.
+ // Don't advertise i32->i64 zextload as being free for RV64. It interacts
+ // poorly with type legalization of compares preferring sext.
if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
EVT MemVT = LD->getMemoryVT();
- if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
- (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
+ if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
(LD->getExtensionType() == ISD::NON_EXTLOAD ||
LD->getExtensionType() == ISD::ZEXTLOAD))
return true;
@@ -1189,7 +1236,9 @@ bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
if (VT.isVector())
return false;
- return Subtarget.hasStdExtZbb() && !isa<ConstantSDNode>(Y);
+ return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
+ Subtarget.hasStdExtZbkb()) &&
+ !isa<ConstantSDNode>(Y);
}
/// Check if sinking \p I's operands to I's basic block is profitable, because
@@ -1230,6 +1279,30 @@ bool RISCVTargetLowering::shouldSinkOperands(
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
return Operand == 0 || Operand == 1;
+ // FIXME: Our patterns can only match vx/vf instructions when the splat
+ // it on the RHS, because TableGen doesn't recognize our VP operations
+ // as commutative.
+ case Intrinsic::vp_add:
+ case Intrinsic::vp_mul:
+ case Intrinsic::vp_and:
+ case Intrinsic::vp_or:
+ case Intrinsic::vp_xor:
+ case Intrinsic::vp_fadd:
+ case Intrinsic::vp_fmul:
+ case Intrinsic::vp_shl:
+ case Intrinsic::vp_lshr:
+ case Intrinsic::vp_ashr:
+ case Intrinsic::vp_udiv:
+ case Intrinsic::vp_sdiv:
+ case Intrinsic::vp_urem:
+ case Intrinsic::vp_srem:
+ return Operand == 1;
+ // ... with the exception of vp.sub/vp.fsub/vp.fdiv, which have
+ // explicit patterns for both LHS and RHS (as 'vr' versions).
+ case Intrinsic::vp_sub:
+ case Intrinsic::vp_fsub:
+ case Intrinsic::vp_fdiv:
+ return Operand == 0 || Operand == 1;
default:
return false;
}
@@ -1277,8 +1350,6 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return false;
if (VT == MVT::f64 && !Subtarget.hasStdExtD())
return false;
- if (Imm.isNegZero())
- return false;
return Imm.isZero();
}
@@ -1482,6 +1553,19 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
return false;
}
+static SDValue getVLOperand(SDValue Op) {
+ assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
+ "Unexpected opcode");
+ bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
+ unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
+ const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
+ RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
+ if (!II)
+ return SDValue();
+ return Op.getOperand(II->VLOperand + 1 + HasChain);
+}
+
static bool useRVVForFixedLengthVectorVT(MVT VT,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
@@ -1667,7 +1751,8 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
return false;
}
-static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) {
+static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
// RISCV FP-to-int conversions saturate to the destination register size, but
// don't produce 0 for nan. We can use a conversion instruction and fix the
// nan case with a compare and a select.
@@ -1679,15 +1764,17 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) {
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
unsigned Opc;
if (SatVT == DstVT)
- Opc = IsSigned ? RISCVISD::FCVT_X_RTZ : RISCVISD::FCVT_XU_RTZ;
+ Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
else if (DstVT == MVT::i64 && SatVT == MVT::i32)
- Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64;
+ Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
else
return SDValue();
// FIXME: Support other SatVTs by clamping before or after the conversion.
SDLoc DL(Op);
- SDValue FpToInt = DAG.getNode(Opc, DL, DstVT, Src);
+ SDValue FpToInt = DAG.getNode(
+ Opc, DL, DstVT, Src,
+ DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
@@ -1898,6 +1985,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// codegen across RV32 and RV64.
unsigned NumViaIntegerBits =
std::min(std::max(NumElts, 8u), Subtarget.getXLen());
+ NumViaIntegerBits = std::min(NumViaIntegerBits,
+ Subtarget.getMaxELENForFixedLengthVectors());
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
// If we have to use more than one INSERT_VECTOR_ELT then this
// optimization is likely to increase code size; avoid peforming it in
@@ -2190,6 +2279,17 @@ static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
// node in order to try and match RVV vector/scalar instructions.
if ((LoC >> 31) == HiC)
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
+
+ // If vl is equal to VLMax and Hi constant is equal to Lo, we could use
+ // vmv.v.x whose EEW = 32 to lower it.
+ auto *Const = dyn_cast<ConstantSDNode>(VL);
+ if (LoC == HiC && Const && Const->getSExtValue() == RISCV::VLMaxSentinel) {
+ MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
+ // TODO: if vl <= min(VLMAX), we can also do this. But we could not
+ // access the subtarget here now.
+ auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, Lo, VL);
+ return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
+ }
}
// Fall back to a stack store and stride x0 vector load.
@@ -2215,8 +2315,13 @@ static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- if (VT.isFloatingPoint())
+ if (VT.isFloatingPoint()) {
+ // If VL is 1, we could use vfmv.s.f.
+ if (isOneConstant(VL))
+ return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT),
+ Scalar, VL);
return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
+ }
MVT XLenVT = Subtarget.getXLenVT();
@@ -2229,16 +2334,98 @@ static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
unsigned ExtOpc =
isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
+ // If VL is 1 and the scalar value won't benefit from immediate, we could
+ // use vmv.s.x.
+ if (isOneConstant(VL) &&
+ (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
+ return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
+ VL);
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
}
assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
"Unexpected scalar for splat lowering!");
+ if (isOneConstant(VL) && isNullConstant(Scalar))
+ return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT),
+ DAG.getConstant(0, DL, XLenVT), VL);
+
// Otherwise use the more complicated splatting algorithm.
return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
}
+// Is the mask a slidedown that shifts in undefs.
+static int matchShuffleAsSlideDown(ArrayRef<int> Mask) {
+ int Size = Mask.size();
+
+ // Elements shifted in should be undef.
+ auto CheckUndefs = [&](int Shift) {
+ for (int i = Size - Shift; i != Size; ++i)
+ if (Mask[i] >= 0)
+ return false;
+ return true;
+ };
+
+ // Elements should be shifted or undef.
+ auto MatchShift = [&](int Shift) {
+ for (int i = 0; i != Size - Shift; ++i)
+ if (Mask[i] >= 0 && Mask[i] != Shift + i)
+ return false;
+ return true;
+ };
+
+ // Try all possible shifts.
+ for (int Shift = 1; Shift != Size; ++Shift)
+ if (CheckUndefs(Shift) && MatchShift(Shift))
+ return Shift;
+
+ // No match.
+ return -1;
+}
+
+static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
+ const RISCVSubtarget &Subtarget) {
+ // We need to be able to widen elements to the next larger integer type.
+ if (VT.getScalarSizeInBits() >= Subtarget.getMaxELENForFixedLengthVectors())
+ return false;
+
+ int Size = Mask.size();
+ assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+ int Srcs[] = {-1, -1};
+ for (int i = 0; i != Size; ++i) {
+ // Ignore undef elements.
+ if (Mask[i] < 0)
+ continue;
+
+ // Is this an even or odd element.
+ int Pol = i % 2;
+
+ // Ensure we consistently use the same source for this element polarity.
+ int Src = Mask[i] / Size;
+ if (Srcs[Pol] < 0)
+ Srcs[Pol] = Src;
+ if (Srcs[Pol] != Src)
+ return false;
+
+ // Make sure the element within the source is appropriate for this element
+ // in the destination.
+ int Elt = Mask[i] % Size;
+ if (Elt != i / 2)
+ return false;
+ }
+
+ // We need to find a source for each polarity and they can't be the same.
+ if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
+ return false;
+
+ // Swap the sources if the second source was in the even polarity.
+ SwapSources = Srcs[0] > Srcs[1];
+
+ return true;
+}
+
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
@@ -2284,8 +2471,12 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
SDValue IntID =
DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
- SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
- DAG.getRegister(RISCV::X0, XLenVT), VL};
+ SDValue Ops[] = {Ld->getChain(),
+ IntID,
+ DAG.getUNDEF(ContainerVT),
+ NewAddr,
+ DAG.getRegister(RISCV::X0, XLenVT),
+ VL};
SDValue NewLoad = DAG.getMemIntrinsicNode(
ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
DAG.getMachineFunction().getMachineMemOperand(
@@ -2324,10 +2515,97 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
}
}
+ ArrayRef<int> Mask = SVN->getMask();
+
+ // Try to match as a slidedown.
+ int SlideAmt = matchShuffleAsSlideDown(Mask);
+ if (SlideAmt >= 0) {
+ // TODO: Should we reduce the VL to account for the upper undef elements?
+ // Requires additional vsetvlis, but might be faster to execute.
+ V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
+ SDValue SlideDown =
+ DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), V1,
+ DAG.getConstant(SlideAmt, DL, XLenVT),
+ TrueMask, VL);
+ return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
+ }
+
+ // Detect an interleave shuffle and lower to
+ // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
+ bool SwapSources;
+ if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
+ // Swap sources if needed.
+ if (SwapSources)
+ std::swap(V1, V2);
+
+ // Extract the lower half of the vectors.
+ MVT HalfVT = VT.getHalfNumVectorElementsVT();
+ V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+ DAG.getConstant(0, DL, XLenVT));
+ V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
+ DAG.getConstant(0, DL, XLenVT));
+
+ // Double the element width and halve the number of elements in an int type.
+ unsigned EltBits = VT.getScalarSizeInBits();
+ MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
+ MVT WideIntVT =
+ MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
+ // Convert this to a scalable vector. We need to base this on the
+ // destination size to ensure there's always a type with a smaller LMUL.
+ MVT WideIntContainerVT =
+ getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
+
+ // Convert sources to scalable vectors with the same element count as the
+ // larger type.
+ MVT HalfContainerVT = MVT::getVectorVT(
+ VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
+ V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
+ V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
+
+ // Cast sources to integer.
+ MVT IntEltVT = MVT::getIntegerVT(EltBits);
+ MVT IntHalfVT =
+ MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
+ V1 = DAG.getBitcast(IntHalfVT, V1);
+ V2 = DAG.getBitcast(IntHalfVT, V2);
+
+ // Freeze V2 since we use it twice and we need to be sure that the add and
+ // multiply see the same value.
+ V2 = DAG.getNode(ISD::FREEZE, DL, IntHalfVT, V2);
+
+ // Recreate TrueMask using the widened type's element count.
+ MVT MaskVT =
+ MVT::getVectorVT(MVT::i1, HalfContainerVT.getVectorElementCount());
+ TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+
+ // Widen V1 and V2 with 0s and add one copy of V2 to V1.
+ SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
+ V2, TrueMask, VL);
+ // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
+ SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
+ DAG.getAllOnesConstant(DL, XLenVT));
+ SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
+ V2, Multiplier, TrueMask, VL);
+ // Add the new copies to our previous addition giving us 2^eltbits copies of
+ // V2. This is equivalent to shifting V2 left by eltbits. This should
+ // combine with the vwmulu.vv above to form vwmaccu.vv.
+ Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
+ TrueMask, VL);
+ // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
+ // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
+ // vector VT.
+ ContainerVT =
+ MVT::getVectorVT(VT.getVectorElementType(),
+ WideIntContainerVT.getVectorElementCount() * 2);
+ Add = DAG.getBitcast(ContainerVT, Add);
+ return convertFromScalableVector(VT, Add, DAG, Subtarget);
+ }
+
// Detect shuffles which can be re-expressed as vector selects; these are
// shuffles in which each element in the destination is taken from an element
// at the corresponding index in either source vectors.
- bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
+ bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
int MaskIndex = MaskIdx.value();
return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
});
@@ -2353,7 +2631,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
// Now construct the mask that will be used by the vselect or blended
// vrgather operation. For vrgathers, construct the appropriate indices into
// each vector.
- for (int MaskIndex : SVN->getMask()) {
+ for (int MaskIndex : Mask) {
bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
if (!IsSelect) {
@@ -2691,15 +2969,25 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
MVT VT = Op.getSimpleValueType();
assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
SDLoc DL(Op);
- if (Op.getOperand(2).getOpcode() == ISD::Constant)
- return Op;
// FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
- // use log(XLen) bits. Mask the shift amount accordingly.
+ // use log(XLen) bits. Mask the shift amount accordingly to prevent
+ // accidentally setting the extra bit.
unsigned ShAmtWidth = Subtarget.getXLen() - 1;
SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
DAG.getConstant(ShAmtWidth, DL, VT));
- unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
- return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
+ // fshl and fshr concatenate their operands in the same order. fsr and fsl
+ // instruction use different orders. fshl will return its first operand for
+ // shift of zero, fshr will return its second operand. fsl and fsr both
+ // return rs1 so the ISD nodes need to have different operand orders.
+ // Shift amount is in rs2.
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ unsigned Opc = RISCVISD::FSL;
+ if (Op.getOpcode() == ISD::FSHR) {
+ std::swap(Op0, Op1);
+ Opc = RISCVISD::FSR;
+ }
+ return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
}
case ISD::TRUNCATE: {
SDLoc DL(Op);
@@ -2774,7 +3062,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// We define our scalable vector types for lmul=1 to use a 64 bit known
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
// vscale as VLENB / 8.
- assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!");
+ static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
if (isa<ConstantSDNode>(Op.getOperand(0))) {
// We assume VLENB is a multiple of 8. We manually choose the best shift
// here because SimplifyDemandedBits isn't always able to simplify it.
@@ -3001,7 +3289,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
- return lowerFP_TO_INT_SAT(Op, DAG);
+ return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
case ISD::FTRUNC:
case ISD::FCEIL:
case ISD::FFLOOR:
@@ -3063,9 +3351,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
unsigned NumOpElts =
Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
SDValue Vec = DAG.getUNDEF(VT);
- for (const auto &OpIdx : enumerate(Op->ops()))
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
+ for (const auto &OpIdx : enumerate(Op->ops())) {
+ SDValue SubVec = OpIdx.value();
+ // Don't insert undef subvectors.
+ if (SubVec.isUndef())
+ continue;
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
+ }
return Vec;
}
case ISD::LOAD:
@@ -3181,6 +3474,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerSET_ROUNDING(Op, DAG);
case ISD::VP_SELECT:
return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
+ case ISD::VP_MERGE:
+ return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
case ISD::VP_ADD:
return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
case ISD::VP_SUB:
@@ -4044,10 +4339,10 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
- if (!II || !II->SplatOperand)
+ if (!II || !II->hasSplatOperand())
return SDValue();
- unsigned SplatOp = II->SplatOperand + HasChain;
+ unsigned SplatOp = II->SplatOperand + 1 + HasChain;
assert(SplatOp < Op.getNumOperands());
SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
@@ -4077,7 +4372,7 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
// that a widening operation never uses SEW=64.
// NOTE: If this fails the below assert, we can probably just find the
// element count from any operand or result and use it to construct the VT.
- assert(II->SplatOperand > 1 && "Unexpected splat operand!");
+ assert(II->SplatOperand > 0 && "Unexpected splat operand!");
MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
// The more complex case is when the scalar is larger than XLenVT.
@@ -4096,8 +4391,7 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
// We need to convert the scalar to a splat vector.
// FIXME: Can we implicitly truncate the scalar if it is known to
// be sign extended?
- // VL should be the last operand.
- SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
+ SDValue VL = getVLOperand(Op);
assert(VL.getValueType() == XLenVT);
ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
@@ -4138,6 +4432,15 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
: RISCVISD::BDECOMPRESS;
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::riscv_bfp:
+ return DAG.getNode(RISCVISD::BFP, DL, XLenVT, Op.getOperand(1),
+ Op.getOperand(2));
+ case Intrinsic::riscv_fsl:
+ return DAG.getNode(RISCVISD::FSL, DL, XLenVT, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::riscv_fsr:
+ return DAG.getNode(RISCVISD::FSR, DL, XLenVT, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
case Intrinsic::riscv_vmv_x_s:
assert(Op.getValueType() == XLenVT && "Unexpected VT!");
return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
@@ -4176,7 +4479,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// vmerge.vvm vDest, vSrc, vVal, mMask
MVT VT = Op.getSimpleValueType();
SDValue Vec = Op.getOperand(1);
- SDValue VL = Op.getOperand(3);
+ SDValue VL = getVLOperand(Op);
SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
@@ -4222,7 +4525,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(1, DL, XLenVT));
// Double the VL since we halved SEW.
- SDValue VL = Op.getOperand(NumOps - (1 + OpOffset));
+ SDValue VL = getVLOperand(Op);
SDValue I32VL =
DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
@@ -4294,7 +4597,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
auto *Load = cast<MemIntrinsicSDNode>(Op);
SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
- if (!IsUnmasked)
+ if (IsUnmasked)
+ Ops.push_back(DAG.getUNDEF(ContainerVT));
+ else
Ops.push_back(PassThru);
Ops.push_back(Op.getOperand(3)); // Ptr
Ops.push_back(Op.getOperand(4)); // Stride
@@ -4720,7 +5025,7 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
// register size. Therefore we must slide the vector group up the full
// amount.
if (SubVecVT.isFixedLengthVector()) {
- if (OrigIdx == 0 && Vec.isUndef())
+ if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
return Op;
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
@@ -4730,6 +5035,10 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), SubVec,
DAG.getConstant(0, DL, XLenVT));
+ if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
+ SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
+ return DAG.getBitcast(Op.getValueType(), SubVec);
+ }
SDValue Mask =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
// Set the vector length to only the number of elements we care about. Note
@@ -5148,7 +5457,9 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
- if (!IsUnmasked)
+ if (IsUnmasked)
+ Ops.push_back(DAG.getUNDEF(ContainerVT));
+ else
Ops.push_back(PassThru);
Ops.push_back(BasePtr);
if (!IsUnmasked)
@@ -5518,13 +5829,20 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
}
}
+ if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
+ IndexVT = IndexVT.changeVectorElementType(XLenVT);
+ Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
+ }
+
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
- if (!IsUnmasked)
+ if (IsUnmasked)
+ Ops.push_back(DAG.getUNDEF(ContainerVT));
+ else
Ops.push_back(PassThru);
Ops.push_back(BasePtr);
Ops.push_back(Index);
@@ -5619,6 +5937,11 @@ SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
}
}
+ if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
+ IndexVT = IndexVT.changeVectorElementType(XLenVT);
+ Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
+ }
+
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
@@ -5697,6 +6020,39 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
RMValue);
}
+static RISCVISD::NodeType getRISCVWOpcodeByIntr(unsigned IntNo) {
+ switch (IntNo) {
+ default:
+ llvm_unreachable("Unexpected Intrinsic");
+ case Intrinsic::riscv_grev:
+ return RISCVISD::GREVW;
+ case Intrinsic::riscv_gorc:
+ return RISCVISD::GORCW;
+ case Intrinsic::riscv_bcompress:
+ return RISCVISD::BCOMPRESSW;
+ case Intrinsic::riscv_bdecompress:
+ return RISCVISD::BDECOMPRESSW;
+ case Intrinsic::riscv_bfp:
+ return RISCVISD::BFPW;
+ case Intrinsic::riscv_fsl:
+ return RISCVISD::FSLW;
+ case Intrinsic::riscv_fsr:
+ return RISCVISD::FSRW;
+ }
+}
+
+// Converts the given intrinsic to a i64 operation with any extension.
+static SDValue customLegalizeToWOpByIntr(SDNode *N, SelectionDAG &DAG,
+ unsigned IntNo) {
+ SDLoc DL(N);
+ RISCVISD::NodeType WOpcode = getRISCVWOpcodeByIntr(IntNo);
+ SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewOp2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+ SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp1, NewOp2);
+ // ReplaceNodeResults requires we maintain the same type for the return value.
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
+}
+
// Returns the opcode of the target-specific SDNode that implements the 32-bit
// form of the given Opcode.
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
@@ -5776,17 +6132,20 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
if (!isTypeLegal(Op0.getValueType()))
return;
if (IsStrict) {
- unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RTZ_RV64
- : RISCVISD::STRICT_FCVT_WU_RTZ_RV64;
+ unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
+ : RISCVISD::STRICT_FCVT_WU_RV64;
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
- SDValue Res = DAG.getNode(Opc, DL, VTs, N->getOperand(0), Op0);
+ SDValue Res = DAG.getNode(
+ Opc, DL, VTs, N->getOperand(0), Op0,
+ DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
Results.push_back(Res.getValue(1));
return;
}
- unsigned Opc =
- IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64;
- SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0);
+ unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
+ SDValue Res =
+ DAG.getNode(Opc, DL, MVT::i64, Op0,
+ DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
@@ -6078,15 +6437,23 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue NewOp1 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue NewOp2 =
+ SDValue NewShAmt =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
// FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
- // Mask the shift amount to 5 bits.
- NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
- DAG.getConstant(0x1f, DL, MVT::i64));
- unsigned Opc =
- N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
- SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
+ // Mask the shift amount to 5 bits to prevent accidentally setting bit 5.
+ NewShAmt = DAG.getNode(ISD::AND, DL, MVT::i64, NewShAmt,
+ DAG.getConstant(0x1f, DL, MVT::i64));
+ // fshl and fshr concatenate their operands in the same order. fsrw and fslw
+ // instruction use different orders. fshl will return its first operand for
+ // shift of zero, fshr will return its second operand. fsl and fsr both
+ // return rs1 so the ISD nodes need to have different operand orders.
+ // Shift amount is in rs2.
+ unsigned Opc = RISCVISD::FSLW;
+ if (N->getOpcode() == ISD::FSHR) {
+ std::swap(NewOp0, NewOp1);
+ Opc = RISCVISD::FSRW;
+ }
+ SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewShAmt);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
break;
}
@@ -6154,6 +6521,31 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
default:
llvm_unreachable(
"Don't know how to custom type legalize this intrinsic!");
+ case Intrinsic::riscv_grev:
+ case Intrinsic::riscv_gorc:
+ case Intrinsic::riscv_bcompress:
+ case Intrinsic::riscv_bdecompress:
+ case Intrinsic::riscv_bfp: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+ Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo));
+ break;
+ }
+ case Intrinsic::riscv_fsl:
+ case Intrinsic::riscv_fsr: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewOp2 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+ SDValue NewOp3 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3));
+ unsigned Opc = getRISCVWOpcodeByIntr(IntNo);
+ SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2, NewOp3);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ break;
+ }
case Intrinsic::riscv_orc_b: {
// Lower to the GORCI encoding for orc.b with the operand extended.
SDValue NewOp =
@@ -6166,20 +6558,6 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
- case Intrinsic::riscv_grev:
- case Intrinsic::riscv_gorc: {
- assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
- "Unexpected custom legalisation");
- SDValue NewOp1 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue NewOp2 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
- unsigned Opc =
- IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW;
- SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
- break;
- }
case Intrinsic::riscv_shfl:
case Intrinsic::riscv_unshfl: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
@@ -6200,21 +6578,6 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
break;
}
- case Intrinsic::riscv_bcompress:
- case Intrinsic::riscv_bdecompress: {
- assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
- "Unexpected custom legalisation");
- SDValue NewOp1 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue NewOp2 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
- unsigned Opc = IntNo == Intrinsic::riscv_bcompress
- ? RISCVISD::BCOMPRESSW
- : RISCVISD::BDECOMPRESSW;
- SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
- break;
- }
case Intrinsic::riscv_vmv_x_s: {
EVT VT = N->getValueType(0);
MVT XLenVT = Subtarget.getXLenVT();
@@ -6923,9 +7286,14 @@ static SDValue performANY_EXTENDCombine(SDNode *N,
// Try to form VWMUL or VWMULU.
// FIXME: Support VWMULSU.
-static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1,
- SelectionDAG &DAG) {
+static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
+ bool Commute) {
assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Commute)
+ std::swap(Op0, Op1);
+
bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
@@ -7002,6 +7370,123 @@ static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1,
return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
}
+static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) {
+ switch (Op.getOpcode()) {
+ case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
+ case ISD::FTRUNC: return RISCVFPRndMode::RTZ;
+ case ISD::FFLOOR: return RISCVFPRndMode::RDN;
+ case ISD::FCEIL: return RISCVFPRndMode::RUP;
+ case ISD::FROUND: return RISCVFPRndMode::RMM;
+ }
+
+ return RISCVFPRndMode::Invalid;
+}
+
+// Fold
+// (fp_to_int (froundeven X)) -> fcvt X, rne
+// (fp_to_int (ftrunc X)) -> fcvt X, rtz
+// (fp_to_int (ffloor X)) -> fcvt X, rdn
+// (fp_to_int (fceil X)) -> fcvt X, rup
+// (fp_to_int (fround X)) -> fcvt X, rmm
+static SDValue performFP_TO_INTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const RISCVSubtarget &Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // Only handle XLen or i32 types. Other types narrower than XLen will
+ // eventually be legalized to XLenVT.
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != XLenVT)
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+
+ // Ensure the FP type is also legal.
+ if (!TLI.isTypeLegal(Src.getValueType()))
+ return SDValue();
+
+ // Don't do this for f16 with Zfhmin and not Zfh.
+ if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
+ return SDValue();
+
+ RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
+ if (FRM == RISCVFPRndMode::Invalid)
+ return SDValue();
+
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ unsigned Opc;
+ if (VT == XLenVT)
+ Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
+ else
+ Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
+
+ SDLoc DL(N);
+ SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
+ DAG.getTargetConstant(FRM, DL, XLenVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
+}
+
+// Fold
+// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
+// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
+// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
+// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
+// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
+static SDValue performFP_TO_INT_SATCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const RISCVSubtarget &Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // Only handle XLen types. Other types narrower than XLen will eventually be
+ // legalized to XLenVT.
+ EVT DstVT = N->getValueType(0);
+ if (DstVT != XLenVT)
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+
+ // Ensure the FP type is also legal.
+ if (!TLI.isTypeLegal(Src.getValueType()))
+ return SDValue();
+
+ // Don't do this for f16 with Zfhmin and not Zfh.
+ if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
+ return SDValue();
+
+ EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+ RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
+ if (FRM == RISCVFPRndMode::Invalid)
+ return SDValue();
+
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
+
+ unsigned Opc;
+ if (SatVT == DstVT)
+ Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
+ else if (DstVT == MVT::i64 && SatVT == MVT::i32)
+ Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
+ else
+ return SDValue();
+ // FIXME: Support other SatVTs by clamping before or after the conversion.
+
+ Src = Src.getOperand(0);
+
+ SDLoc DL(N);
+ SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
+ DAG.getTargetConstant(FRM, DL, XLenVT));
+
+ // RISCV FP-to-int conversions saturate to the destination register size, but
+ // don't produce 0 for nan.
+ SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
+ return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
+}
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -7083,25 +7568,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue(N, 0);
break;
}
- case RISCVISD::FSL:
- case RISCVISD::FSR: {
- // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
- unsigned BitWidth = N->getOperand(2).getValueSizeInBits();
- assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
- if (SimplifyDemandedLowBitsHelper(2, Log2_32(BitWidth) + 1))
- return SDValue(N, 0);
- break;
- }
- case RISCVISD::FSLW:
- case RISCVISD::FSRW: {
- // Only the lower 32 bits of Values and lower 6 bits of shift amount are
- // read.
- if (SimplifyDemandedLowBitsHelper(0, 32) ||
- SimplifyDemandedLowBitsHelper(1, 32) ||
- SimplifyDemandedLowBitsHelper(2, 6))
- return SDValue(N, 0);
- break;
- }
case RISCVISD::GREV:
case RISCVISD::GORC: {
// Only the lower log2(Bitwidth) bits of the the shift amount are read.
@@ -7331,6 +7797,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return performFP_TO_INTCombine(N, DCI, Subtarget);
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
case ISD::FCOPYSIGN: {
EVT VT = N->getValueType(0);
if (!VT.isVector())
@@ -7464,15 +7936,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
- case RISCVISD::MUL_VL: {
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- if (SDValue V = combineMUL_VLToVWMUL(N, Op0, Op1, DAG))
+ case RISCVISD::MUL_VL:
+ if (SDValue V = combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ false))
return V;
- if (SDValue V = combineMUL_VLToVWMUL(N, Op1, Op0, DAG))
- return V;
- return SDValue();
- }
+ // Mul is commutative.
+ return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true);
case ISD::STORE: {
auto *Store = cast<StoreSDNode>(N);
SDValue Val = Store->getValue();
@@ -7486,12 +7954,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (VecVT.getVectorElementType() == MemVT) {
SDLoc DL(N);
MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
- return DAG.getStoreVP(Store->getChain(), DL, Src, Store->getBasePtr(),
- DAG.getConstant(1, DL, MaskVT),
- DAG.getConstant(1, DL, Subtarget.getXLenVT()),
- Store->getPointerInfo(),
- Store->getOriginalAlign(),
- Store->getMemOperand()->getFlags());
+ return DAG.getStoreVP(
+ Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
+ DAG.getConstant(1, DL, MaskVT),
+ DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
+ Store->getMemOperand(), Store->getAddressingMode(),
+ Store->isTruncatingStore(), /*IsCompress*/ false);
}
}
@@ -7732,14 +8200,18 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// We assume VLENB is no more than 65536 / 8 bytes.
Known.Zero.setBitsFrom(14);
break;
- case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = Op.getConstantOperandVal(1);
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo =
+ Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
switch (IntNo) {
default:
// We can't do anything for most intrinsics.
break;
case Intrinsic::riscv_vsetvli:
case Intrinsic::riscv_vsetvlimax:
+ case Intrinsic::riscv_vsetvli_opt:
+ case Intrinsic::riscv_vsetvlimax_opt:
// Assume that VL output is positive and would fit in an int32_t.
// TODO: VLEN might be capped at 16 bits in a future V spec update.
if (BitWidth >= 32)
@@ -7779,10 +8251,11 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::UNSHFLW:
case RISCVISD::BCOMPRESSW:
case RISCVISD::BDECOMPRESSW:
- case RISCVISD::FCVT_W_RTZ_RV64:
- case RISCVISD::FCVT_WU_RTZ_RV64:
- case RISCVISD::STRICT_FCVT_W_RTZ_RV64:
- case RISCVISD::STRICT_FCVT_WU_RTZ_RV64:
+ case RISCVISD::BFPW:
+ case RISCVISD::FCVT_W_RV64:
+ case RISCVISD::FCVT_WU_RV64:
+ case RISCVISD::STRICT_FCVT_W_RV64:
+ case RISCVISD::STRICT_FCVT_WU_RV64:
// TODO: As the result is sign-extended, this is conservatively correct. A
// more precise answer could be calculated for SRAW depending on known
// bits in the shift amount.
@@ -7958,6 +8431,42 @@ static bool isSelectPseudo(MachineInstr &MI) {
}
}
+static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
+ unsigned RelOpcode, unsigned EqOpcode,
+ const RISCVSubtarget &Subtarget) {
+ DebugLoc DL = MI.getDebugLoc();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src1Reg = MI.getOperand(1).getReg();
+ Register Src2Reg = MI.getOperand(2).getReg();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
+
+ // Save the current FFLAGS.
+ BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
+
+ auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
+ .addReg(Src1Reg)
+ .addReg(Src2Reg);
+ if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
+ MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
+ // Restore the FFLAGS.
+ BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
+ .addReg(SavedFFlags, RegState::Kill);
+
+ // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
+ auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
+ .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
+ .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
+ if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
+ MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
+ // Erase the pseudoinstruction.
+ MI.eraseFromParent();
+ return BB;
+}
+
static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
MachineBasicBlock *BB,
const RISCVSubtarget &Subtarget) {
@@ -8099,6 +8608,18 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitBuildPairF64Pseudo(MI, BB);
case RISCV::SplitF64Pseudo:
return emitSplitF64Pseudo(MI, BB);
+ case RISCV::PseudoQuietFLE_H:
+ return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
+ case RISCV::PseudoQuietFLT_H:
+ return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
+ case RISCV::PseudoQuietFLE_S:
+ return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
+ case RISCV::PseudoQuietFLT_S:
+ return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
+ case RISCV::PseudoQuietFLE_D:
+ return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
+ case RISCV::PseudoQuietFLT_D:
+ return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
}
}
@@ -8393,7 +8914,8 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
LocVT = XLenVT;
LocInfo = CCValAssign::Indirect;
} else if (ValVT.isScalableVector()) {
- report_fatal_error("Unable to pass scalable vector types on the stack");
+ LocVT = XLenVT;
+ LocInfo = CCValAssign::Indirect;
} else {
// Pass fixed-length vectors on the stack.
LocVT = ValVT;
@@ -8592,8 +9114,14 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
EVT LocVT = VA.getLocVT();
EVT ValVT = VA.getValVT();
EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
+ if (ValVT.isScalableVector()) {
+ // When the value is a scalable vector, we save the pointer which points to
+ // the scalable vector value in the stack. The ValVT will be the pointer
+ // type, instead of the scalable vector type.
+ ValVT = LocVT;
+ }
int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
- /*Immutable=*/true);
+ /*IsImmutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val;
@@ -8623,7 +9151,8 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
if (VA.isMemLoc()) {
// f64 is passed on the stack.
- int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
+ int FI =
+ MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
return DAG.getLoad(MVT::f64, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
@@ -8637,7 +9166,7 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
SDValue Hi;
if (VA.getLocReg() == RISCV::X17) {
// Second half of f64 is passed on the stack.
- int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
+ int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
@@ -9510,12 +10039,12 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FMV_X_ANYEXTH)
NODE_NAME_CASE(FMV_W_X_RV64)
NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
- NODE_NAME_CASE(FCVT_X_RTZ)
- NODE_NAME_CASE(FCVT_XU_RTZ)
- NODE_NAME_CASE(FCVT_W_RTZ_RV64)
- NODE_NAME_CASE(FCVT_WU_RTZ_RV64)
- NODE_NAME_CASE(STRICT_FCVT_W_RTZ_RV64)
- NODE_NAME_CASE(STRICT_FCVT_WU_RTZ_RV64)
+ NODE_NAME_CASE(FCVT_X)
+ NODE_NAME_CASE(FCVT_XU)
+ NODE_NAME_CASE(FCVT_W_RV64)
+ NODE_NAME_CASE(FCVT_WU_RV64)
+ NODE_NAME_CASE(STRICT_FCVT_W_RV64)
+ NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
NODE_NAME_CASE(READ_CYCLE_WIDE)
NODE_NAME_CASE(GREV)
NODE_NAME_CASE(GREVW)
@@ -9525,6 +10054,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SHFLW)
NODE_NAME_CASE(UNSHFL)
NODE_NAME_CASE(UNSHFLW)
+ NODE_NAME_CASE(BFP)
+ NODE_NAME_CASE(BFPW)
NODE_NAME_CASE(BCOMPRESS)
NODE_NAME_CASE(BCOMPRESSW)
NODE_NAME_CASE(BDECOMPRESS)
@@ -9598,8 +10129,10 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FP_ROUND_VL)
NODE_NAME_CASE(VWMUL_VL)
NODE_NAME_CASE(VWMULU_VL)
+ NODE_NAME_CASE(VWADDU_VL)
NODE_NAME_CASE(SETCC_VL)
NODE_NAME_CASE(VSELECT_VL)
+ NODE_NAME_CASE(VP_MERGE_VL)
NODE_NAME_CASE(VMAND_VL)
NODE_NAME_CASE(VMOR_VL)
NODE_NAME_CASE(VMXOR_VL)
@@ -9768,12 +10301,18 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
.Default(RISCV::NoRegister);
if (FReg != RISCV::NoRegister) {
assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
- if (Subtarget.hasStdExtD()) {
+ if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
unsigned RegNo = FReg - RISCV::F0_F;
unsigned DReg = RISCV::F0_D + RegNo;
return std::make_pair(DReg, &RISCV::FPR64RegClass);
}
- return std::make_pair(FReg, &RISCV::FPR32RegClass);
+ if (VT == MVT::f32 || VT == MVT::Other)
+ return std::make_pair(FReg, &RISCV::FPR32RegClass);
+ if (Subtarget.hasStdExtZfh() && VT == MVT::f16) {
+ unsigned RegNo = FReg - RISCV::F0_F;
+ unsigned HReg = RISCV::F0_H + RegNo;
+ return std::make_pair(HReg, &RISCV::FPR16RegClass);
+ }
}
}
@@ -10070,6 +10609,24 @@ bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
}
}
+unsigned RISCVTargetLowering::getJumpTableEncoding() const {
+ // If we are using the small code model, we can reduce size of jump table
+ // entry to 4 bytes.
+ if (Subtarget.is64Bit() && !isPositionIndependent() &&
+ getTargetMachine().getCodeModel() == CodeModel::Small) {
+ return MachineJumpTableInfo::EK_Custom32;
+ }
+ return TargetLowering::getJumpTableEncoding();
+}
+
+const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
+ const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
+ unsigned uid, MCContext &Ctx) const {
+ assert(Subtarget.is64Bit() && !isPositionIndependent() &&
+ getTargetMachine().getCodeModel() == CodeModel::Small);
+ return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
+}
+
bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
VT = VT.getScalarType();
@@ -10293,6 +10850,60 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
return SDValue();
}
+SDValue
+RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N, 0); // Lower SDIV as SDIV
+
+ assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
+ "Unexpected divisor!");
+
+ // Conditional move is needed, so do the transformation iff Zbt is enabled.
+ if (!Subtarget.hasStdExtZbt())
+ return SDValue();
+
+ // When |Divisor| >= 2 ^ 12, it isn't profitable to do such transformation.
+ // Besides, more critical path instructions will be generated when dividing
+ // by 2. So we keep using the original DAGs for these cases.
+ unsigned Lg2 = Divisor.countTrailingZeros();
+ if (Lg2 == 1 || Lg2 >= 12)
+ return SDValue();
+
+ // fold (sdiv X, pow2)
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
+
+ // Add (N0 < 0) ? Pow2 - 1 : 0;
+ SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+ SDValue Sel = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(Sel.getNode());
+
+ // Divide by pow2.
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, VT, Sel, DAG.getConstant(Lg2, DL, VT));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (Divisor.isNonNegative())
+ return SRA;
+
+ Created.push_back(SRA.getNode());
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
+}
+
#define GET_REGISTER_MATCHER
#include "RISCVGenAsmMatcher.inc"
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 48c5ce730933..58b7ec89f875 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -63,11 +63,11 @@ enum NodeType : unsigned {
CLZW,
CTZW,
// RV64IB/RV32IB funnel shifts, with the semantics of the named RISC-V
- // instructions, but the same operand order as fshl/fshr intrinsics.
+ // instructions. Operand order is rs1, rs3, rs2/shamt.
FSR,
FSL,
- // RV64IB funnel shifts, with the semantics of the named RISC-V instructions,
- // but the same operand order as fshl/fshr intrinsics.
+ // RV64IB funnel shifts, with the semantics of the named RISC-V instructions.
+ // Operand order is rs1, rs3, rs2/shamt.
FSRW,
FSLW,
// FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as
@@ -86,14 +86,16 @@ enum NodeType : unsigned {
FMV_X_ANYEXTW_RV64,
// FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and
// fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of
- // range inputs. These are used for FP_TO_S/UINT_SAT lowering.
- FCVT_X_RTZ,
- FCVT_XU_RTZ,
+ // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode
+ // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
+ FCVT_X,
+ FCVT_XU,
// FP to 32 bit int conversions for RV64. These are used to keep track of the
// result being sign extended to 64 bit. These saturate out of range inputs.
- // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering.
- FCVT_W_RTZ_RV64,
- FCVT_WU_RTZ_RV64,
+ // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode
+ // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
+ FCVT_W_RV64,
+ FCVT_WU_RV64,
// READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
// (returns (Lo, Hi)). It takes a chain operand.
READ_CYCLE_WIDE,
@@ -118,6 +120,13 @@ enum NodeType : unsigned {
BCOMPRESSW,
BDECOMPRESS,
BDECOMPRESSW,
+ // The bit field place (bfp) instruction places up to XLEN/2 LSB bits from rs2
+ // into the value in rs1. The upper bits of rs2 control the length of the bit
+ // field and target position. The layout of rs2 is chosen in a way that makes
+ // it possible to construct rs2 easily using pack[h] instructions and/or
+ // andi/lui.
+ BFP,
+ BFPW,
// Vector Extension
// VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
// for the VL value to be used for the operation.
@@ -236,6 +245,7 @@ enum NodeType : unsigned {
// Widening instructions
VWMUL_VL,
VWMULU_VL,
+ VWADDU_VL,
// Vector compare producing a mask. Fourth operand is input mask. Fifth
// operand is VL.
@@ -243,6 +253,10 @@ enum NodeType : unsigned {
// Vector select with an additional VL operand. This operation is unmasked.
VSELECT_VL,
+ // Vector select with operand #2 (the value when the condition is false) tied
+ // to the destination and an additional VL operand. This operation is
+ // unmasked.
+ VP_MERGE_VL,
// Mask binary operators.
VMAND_VL,
@@ -284,8 +298,8 @@ enum NodeType : unsigned {
// FP to 32 bit int conversions for RV64. These are used to keep track of the
// result being sign extended to 64 bit. These saturate out of range inputs.
- STRICT_FCVT_W_RTZ_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE,
- STRICT_FCVT_WU_RTZ_RV64,
+ STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE,
+ STRICT_FCVT_WU_RV64,
// Memory opcodes start here.
VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -462,6 +476,8 @@ public:
SelectionDAG &DAG) const override;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+ template <class NodeTy>
+ SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override {
@@ -524,6 +540,16 @@ public:
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
+ SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const override;
+
+ unsigned getJumpTableEncoding() const override;
+
+ const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid,
+ MCContext &Ctx) const override;
+
private:
/// RISCVCCAssignFn - This target-specific function extends the default
/// CCValAssign with additional information used to lower RISC-V calling
@@ -544,9 +570,6 @@ private:
bool IsRet, CallLoweringInfo *CLI,
RISCVCCAssignFn Fn) const;
- template <class NodeTy>
- SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
-
SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
bool UseGOT) const;
SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
@@ -652,6 +675,15 @@ namespace RISCVVIntrinsicsTable {
struct RISCVVIntrinsicInfo {
unsigned IntrinsicID;
uint8_t SplatOperand;
+ uint8_t VLOperand;
+ bool hasSplatOperand() const {
+ // 0xF is not valid. See NoSplatOperand in IntrinsicsRISCV.td.
+ return SplatOperand != 0xF;
+ }
+ bool hasVLOperand() const {
+ // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td.
+ return VLOperand != 0x1F;
+ }
};
using namespace RISCV;
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index dbfc90f36f80..d39e0805a79c 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -59,12 +59,13 @@ class VSETVLIInfo {
uint8_t MaskAgnostic : 1;
uint8_t MaskRegOp : 1;
uint8_t StoreOp : 1;
+ uint8_t ScalarMovOp : 1;
uint8_t SEWLMULRatioOnly : 1;
public:
VSETVLIInfo()
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false),
- StoreOp(false), SEWLMULRatioOnly(false) {}
+ StoreOp(false), ScalarMovOp(false), SEWLMULRatioOnly(false) {}
static VSETVLIInfo getUnknown() {
VSETVLIInfo Info;
@@ -96,6 +97,18 @@ public:
assert(hasAVLImm());
return AVLImm;
}
+ bool hasZeroAVL() const {
+ if (hasAVLImm())
+ return getAVLImm() == 0;
+ return false;
+ }
+ bool hasNonZeroAVL() const {
+ if (hasAVLImm())
+ return getAVLImm() > 0;
+ if (hasAVLReg())
+ return getAVLReg() == RISCV::X0;
+ return false;
+ }
bool hasSameAVL(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
@@ -120,7 +133,7 @@ public:
MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
}
void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO,
- bool IsStore) {
+ bool IsStore, bool IsScalarMovOp) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = L;
@@ -129,6 +142,7 @@ public:
MaskAgnostic = MA;
MaskRegOp = MRO;
StoreOp = IsStore;
+ ScalarMovOp = IsScalarMovOp;
}
unsigned encodeVTYPE() const {
@@ -139,6 +153,16 @@ public:
bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
+ bool hasSameSEW(const VSETVLIInfo &Other) const {
+ assert(isValid() && Other.isValid() &&
+ "Can't compare invalid VSETVLIInfos");
+ assert(!isUnknown() && !Other.isUnknown() &&
+ "Can't compare VTYPE in unknown state");
+ assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
+ "Can't compare when only LMUL/SEW ratio is valid.");
+ return SEW == Other.SEW;
+ }
+
bool hasSameVTYPE(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
"Can't compare invalid VSETVLIInfos");
@@ -178,6 +202,15 @@ public:
return getSEWLMULRatio() == Other.getSEWLMULRatio();
}
+ bool hasSamePolicy(const VSETVLIInfo &Other) const {
+ assert(isValid() && Other.isValid() &&
+ "Can't compare invalid VSETVLIInfos");
+ assert(!isUnknown() && !Other.isUnknown() &&
+ "Can't compare VTYPE in unknown state");
+ return TailAgnostic == Other.TailAgnostic &&
+ MaskAgnostic == Other.MaskAgnostic;
+ }
+
bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const {
// Simple case, see if full VTYPE matches.
if (hasSameVTYPE(InstrInfo))
@@ -222,6 +255,15 @@ public:
return true;
}
+ // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0.
+ // So it's compatible when we could make sure that both VL be the same
+ // situation.
+ if (!Strict && InstrInfo.ScalarMovOp && InstrInfo.hasAVLImm() &&
+ ((hasNonZeroAVL() && InstrInfo.hasNonZeroAVL()) ||
+ (hasZeroAVL() && InstrInfo.hasZeroAVL())) &&
+ hasSameSEW(InstrInfo) && hasSamePolicy(InstrInfo))
+ return true;
+
// The AVL must match.
if (!hasSameAVL(InstrInfo))
return false;
@@ -414,6 +456,36 @@ static MachineInstr *elideCopies(MachineInstr *MI,
}
}
+static bool isScalarMoveInstr(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case RISCV::PseudoVMV_S_X_M1:
+ case RISCV::PseudoVMV_S_X_M2:
+ case RISCV::PseudoVMV_S_X_M4:
+ case RISCV::PseudoVMV_S_X_M8:
+ case RISCV::PseudoVMV_S_X_MF2:
+ case RISCV::PseudoVMV_S_X_MF4:
+ case RISCV::PseudoVMV_S_X_MF8:
+ case RISCV::PseudoVFMV_S_F16_M1:
+ case RISCV::PseudoVFMV_S_F16_M2:
+ case RISCV::PseudoVFMV_S_F16_M4:
+ case RISCV::PseudoVFMV_S_F16_M8:
+ case RISCV::PseudoVFMV_S_F16_MF2:
+ case RISCV::PseudoVFMV_S_F16_MF4:
+ case RISCV::PseudoVFMV_S_F32_M1:
+ case RISCV::PseudoVFMV_S_F32_M2:
+ case RISCV::PseudoVFMV_S_F32_M4:
+ case RISCV::PseudoVFMV_S_F32_M8:
+ case RISCV::PseudoVFMV_S_F32_MF2:
+ case RISCV::PseudoVFMV_S_F64_M1:
+ case RISCV::PseudoVFMV_S_F64_M2:
+ case RISCV::PseudoVFMV_S_F64_M4:
+ case RISCV::PseudoVFMV_S_F64_M8:
+ return true;
+ }
+}
+
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const MachineRegisterInfo *MRI) {
VSETVLIInfo InstrInfo;
@@ -461,6 +533,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
// If there are no explicit defs, this is a store instruction which can
// ignore the tail and mask policies.
bool StoreOp = MI.getNumExplicitDefs() == 0;
+ bool ScalarMovOp = isScalarMoveInstr(MI);
if (RISCVII::hasVLOp(TSFlags)) {
const MachineOperand &VLOp = MI.getOperand(NumOperands - 2);
@@ -477,7 +550,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
} else
InstrInfo.setAVLReg(RISCV::NoRegister);
InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic,
- /*MaskAgnostic*/ false, MaskRegOp, StoreOp);
+ /*MaskAgnostic*/ false, MaskRegOp, StoreOp, ScalarMovOp);
return InstrInfo;
}
@@ -1000,6 +1073,13 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
NeedInsertVSETVLI = false;
}
+ if (isScalarMoveInstr(MI) &&
+ ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) ||
+ (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) &&
+ NewInfo.hasSameVLMAX(CurInfo)) {
+ PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
+ NeedInsertVSETVLI = false;
+ }
}
if (NeedInsertVSETVLI)
insertVSETVLI(MBB, MI, NewInfo, CurInfo);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 6a16b6354f95..f99d0f56c406 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -206,6 +206,13 @@ class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string
let isCodeGenOnly = 1;
}
+class PseudoQuietFCMP<RegisterClass Ty>
+ : Pseudo<(outs GPR:$rd), (ins Ty:$rs1, Ty:$rs2), []> {
+ let hasSideEffects = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
+}
+
// Pseudo load instructions.
class PseudoLoad<string opcodestr, RegisterClass rdty = GPR>
: Pseudo<(outs rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr"> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 2e2e00886d57..7baed2793e4e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -201,8 +201,9 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
if (MBBI->modifiesRegister(RISCV::VL))
return false;
- // Go through all defined operands, including implicit defines.
- for (const MachineOperand &MO : MBBI->operands()) {
+ // Only converting whole register copies to vmv.v.v when the defining
+ // value appears in the explicit operands.
+ for (const MachineOperand &MO : MBBI->explicit_operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
if (!FoundDef && TRI->isSubRegisterEq(MO.getReg(), SrcReg)) {
@@ -914,7 +915,7 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
.addMBB(&DestBB, RISCVII::MO_CALL);
RS->enterBasicBlockEnd(MBB);
- unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
+ Register Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
MI.getIterator(), false, 0);
// TODO: The case when there is no scavenged register needs special handling.
assert(Scav != RISCV::NoRegister && "No register is scavenged!");
@@ -1145,6 +1146,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
else
Ok = isUInt<5>(Imm);
break;
+ case RISCVOp::OPERAND_RVKRNUM:
+ Ok = Imm >= 0 && Imm <= 10;
+ break;
}
if (!Ok) {
ErrInfo = "Invalid immediate";
@@ -1399,19 +1403,28 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \
RISCV::PseudoV##OP##_##TYPE##_##LMUL
-#define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \
- CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \
- case CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \
- case CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \
- case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \
+#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) \
+ CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \
case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2): \
case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4): \
case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8)
+#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) \
+ CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \
+ case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE)
+
+#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) \
+ CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \
+ case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE)
+
+#define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \
+ CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \
+ case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE)
+
#define CASE_VFMA_SPLATS(OP) \
- CASE_VFMA_OPCODE_LMULS(OP, VF16): \
- case CASE_VFMA_OPCODE_LMULS(OP, VF32): \
- case CASE_VFMA_OPCODE_LMULS(OP, VF64)
+ CASE_VFMA_OPCODE_LMULS_MF4(OP, VF16): \
+ case CASE_VFMA_OPCODE_LMULS_MF2(OP, VF32): \
+ case CASE_VFMA_OPCODE_LMULS_M1(OP, VF64)
// clang-format on
bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
@@ -1430,10 +1443,10 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case CASE_VFMA_SPLATS(FNMSUB):
case CASE_VFMA_SPLATS(FNMACC):
case CASE_VFMA_SPLATS(FNMSAC):
- case CASE_VFMA_OPCODE_LMULS(FMACC, VV):
- case CASE_VFMA_OPCODE_LMULS(FMSAC, VV):
- case CASE_VFMA_OPCODE_LMULS(FNMACC, VV):
- case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV):
case CASE_VFMA_OPCODE_LMULS(MADD, VX):
case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
case CASE_VFMA_OPCODE_LMULS(MACC, VX):
@@ -1454,10 +1467,10 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
return false;
return true;
}
- case CASE_VFMA_OPCODE_LMULS(FMADD, VV):
- case CASE_VFMA_OPCODE_LMULS(FMSUB, VV):
- case CASE_VFMA_OPCODE_LMULS(FNMADD, VV):
- case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV):
case CASE_VFMA_OPCODE_LMULS(MADD, VV):
case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
// If the tail policy is undisturbed we can't commute.
@@ -1533,19 +1546,28 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
break;
-#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
- CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
- CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
- CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
+#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
+#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \
+ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)
+
+#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \
+ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)
+
+#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
+ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
+
#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
- CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF16) \
- CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF32) \
- CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF64)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VF16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VF32) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VF64)
MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool NewMI,
@@ -1566,10 +1588,10 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
case CASE_VFMA_SPLATS(FNMADD):
case CASE_VFMA_SPLATS(FNMSAC):
case CASE_VFMA_SPLATS(FNMSUB):
- case CASE_VFMA_OPCODE_LMULS(FMACC, VV):
- case CASE_VFMA_OPCODE_LMULS(FMSAC, VV):
- case CASE_VFMA_OPCODE_LMULS(FNMACC, VV):
- case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV):
case CASE_VFMA_OPCODE_LMULS(MADD, VX):
case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
case CASE_VFMA_OPCODE_LMULS(MACC, VX):
@@ -1592,10 +1614,10 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
- CASE_VFMA_CHANGE_OPCODE_LMULS(FMACC, FMADD, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS(FMSAC, FMSUB, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS(FNMACC, FNMADD, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSAC, FNMSUB, VV)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMACC, FMADD, VV)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSAC, FMSUB, VV)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMACC, FNMADD, VV)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSAC, FNMSUB, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
@@ -1609,10 +1631,10 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
- case CASE_VFMA_OPCODE_LMULS(FMADD, VV):
- case CASE_VFMA_OPCODE_LMULS(FMSUB, VV):
- case CASE_VFMA_OPCODE_LMULS(FNMADD, VV):
- case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV):
+ case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV):
case CASE_VFMA_OPCODE_LMULS(MADD, VV):
case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
@@ -1623,10 +1645,10 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
- CASE_VFMA_CHANGE_OPCODE_LMULS(FMADD, FMACC, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS(FMSUB, FMSAC, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS(FNMADD, FNMACC, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSUB, FNMSAC, VV)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMADD, FMACC, VV)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSUB, FMSAC, VV)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMADD, FNMACC, VV)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSUB, FNMSAC, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
}
@@ -1655,13 +1677,16 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
RISCV::PseudoV##OP##_##LMUL##_TIED
-#define CASE_WIDEOP_OPCODE_LMULS(OP) \
- CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
- case CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
+#define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \
+ CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
+
+#define CASE_WIDEOP_OPCODE_LMULS(OP) \
+ CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
+ case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)
// clang-format on
#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
@@ -1669,22 +1694,25 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
NewOpc = RISCV::PseudoV##OP##_##LMUL; \
break;
-#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
- CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
+#define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
+#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
+
MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
LiveVariables *LV,
LiveIntervals *LIS) const {
switch (MI.getOpcode()) {
default:
break;
- case CASE_WIDEOP_OPCODE_LMULS(FWADD_WV):
- case CASE_WIDEOP_OPCODE_LMULS(FWSUB_WV):
+ case CASE_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
+ case CASE_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV):
case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
@@ -1694,14 +1722,14 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
- CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV)
- CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
}
- //clang-format on
+ // clang-format on
MachineBasicBlock &MBB = *MI.getParent();
MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 71eb6f01a4f4..64cd89cda06a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -402,6 +402,21 @@ def AddiPairImmB : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
+def XLenSubTrailingOnes : SDNodeXForm<imm, [{
+ uint64_t XLen = Subtarget->getXLen();
+ uint64_t TrailingOnes = N->getAPIntValue().countTrailingOnes();
+ return CurDAG->getTargetConstant(XLen - TrailingOnes, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+// Checks if this mask is a non-empty sequence of ones starting at the
+// least significant bit with the remainder zero and exceeds simm12.
+def TrailingOnesMask : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ return !isInt<12>(N->getSExtValue()) && isMask_64(N->getZExtValue());
+}], XLenSubTrailingOnes>;
+
//===----------------------------------------------------------------------===//
// Instruction Formats
//===----------------------------------------------------------------------===//
@@ -1019,6 +1034,23 @@ def mul_const_oneuse : PatFrag<(ops node:$A, node:$B),
return false;
}]>;
+def sext_oneuse : PatFrag<(ops node:$A), (sext node:$A), [{
+ return N->hasOneUse();
+}]>;
+
+def zext_oneuse : PatFrag<(ops node:$A), (zext node:$A), [{
+ return N->hasOneUse();
+}]>;
+
+def anyext_oneuse : PatFrag<(ops node:$A), (anyext node:$A), [{
+ return N->hasOneUse();
+}]>;
+
+def fpext_oneuse : PatFrag<(ops node:$A),
+ (any_fpextend node:$A), [{
+ return N->hasOneUse();
+}]>;
+
/// Simple arithmetic operations
def : PatGprGpr<add, ADD>;
@@ -1034,6 +1066,10 @@ def : PatGprUimmLog2XLen<shl, SLLI>;
def : PatGprUimmLog2XLen<srl, SRLI>;
def : PatGprUimmLog2XLen<sra, SRAI>;
+// AND with trailing ones mask exceeding simm12.
+def : Pat<(XLenVT (and GPR:$rs, TrailingOnesMask:$mask)),
+ (SRLI (SLLI $rs, TrailingOnesMask:$mask), TrailingOnesMask:$mask)>;
+
// Match both a plain shift and one where the shift amount is masked (this is
// typically introduced when the legalizer promotes the shift amount and
// zero-extends it). For RISC-V, the mask is unnecessary as shifts in the base
@@ -1350,6 +1386,10 @@ def ReadFRM : ReadSysReg<SysRegFRM, [FRM]>;
def WriteFRM : WriteSysReg<SysRegFRM, [FRM]>;
def WriteFRMImm : WriteSysRegImm<SysRegFRM, [FRM]>;
+let hasSideEffects = true in {
+def ReadFFLAGS : ReadSysReg<SysRegFFLAGS, [FFLAGS]>;
+def WriteFFLAGS : WriteSysReg<SysRegFFLAGS, [FFLAGS]>;
+}
/// Other pseudo-instructions
// Pessimistically assume the stack pointer will be clobbered
@@ -1476,5 +1516,6 @@ include "RISCVInstrInfoF.td"
include "RISCVInstrInfoD.td"
include "RISCVInstrInfoC.td"
include "RISCVInstrInfoZb.td"
+include "RISCVInstrInfoZk.td"
include "RISCVInstrInfoV.td"
include "RISCVInstrInfoZfh.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index d6c31c4804db..2837b92da81f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -30,21 +30,12 @@ def RISCVSplitF64 : SDNode<"RISCVISD::SplitF64", SDT_RISCVSplitF64>;
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtD] in {
-
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
-def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd),
- (ins GPR:$rs1, simm12:$imm12),
- "fld", "$rd, ${imm12}(${rs1})">,
- Sched<[WriteFLD64, ReadFMemBase]>;
+def FLD : FPLoad_r<0b011, "fld", FPR64, WriteFLD64>;
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
// encoding.
-let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
-def FSD : RVInstS<0b011, OPC_STORE_FP, (outs),
- (ins FPR64:$rs2, GPR:$rs1, simm12:$imm12),
- "fsd", "$rs2, ${imm12}(${rs1})">,
- Sched<[WriteFST64, ReadStoreData, ReadFMemBase]>;
+def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>;
let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in {
def FMADD_D : FPFMA_rrr_frm<OPC_MADD, 0b01, "fmadd.d", FPR64>;
@@ -167,6 +158,10 @@ def : InstAlias<"fge.d $rd, $rs, $rt",
def PseudoFLD : PseudoFloatLoad<"fld", FPR64>;
def PseudoFSD : PseudoStore<"fsd", FPR64>;
+let usesCustomInserter = 1 in {
+def PseudoQuietFLE_D : PseudoQuietFCMP<FPR64>;
+def PseudoQuietFLT_D : PseudoQuietFCMP<FPR64>;
+}
} // Predicates = [HasStdExtD]
//===----------------------------------------------------------------------===//
@@ -231,13 +226,34 @@ def : PatFpr64Fpr64<fminnum, FMIN_D>;
def : PatFpr64Fpr64<fmaxnum, FMAX_D>;
/// Setcc
-
-def : PatFpr64Fpr64<seteq, FEQ_D>;
-def : PatFpr64Fpr64<setoeq, FEQ_D>;
-def : PatFpr64Fpr64<setlt, FLT_D>;
-def : PatFpr64Fpr64<setolt, FLT_D>;
-def : PatFpr64Fpr64<setle, FLE_D>;
-def : PatFpr64Fpr64<setole, FLE_D>;
+// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
+// strict versions of those.
+
+// Match non-signaling FEQ_D
+def : PatSetCC<FPR64, any_fsetcc, SETEQ, FEQ_D>;
+def : PatSetCC<FPR64, any_fsetcc, SETOEQ, FEQ_D>;
+def : PatSetCC<FPR64, strict_fsetcc, SETLT, PseudoQuietFLT_D>;
+def : PatSetCC<FPR64, strict_fsetcc, SETOLT, PseudoQuietFLT_D>;
+def : PatSetCC<FPR64, strict_fsetcc, SETLE, PseudoQuietFLE_D>;
+def : PatSetCC<FPR64, strict_fsetcc, SETOLE, PseudoQuietFLE_D>;
+
+// Match signaling FEQ_D
+def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs2, SETEQ),
+ (AND (FLE_D $rs1, $rs2),
+ (FLE_D $rs2, $rs1))>;
+def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs2, SETOEQ),
+ (AND (FLE_D $rs1, $rs2),
+ (FLE_D $rs2, $rs1))>;
+// If both operands are the same, use a single FLE.
+def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs1, SETEQ),
+ (FLE_D $rs1, $rs1)>;
+def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs1, SETOEQ),
+ (FLE_D $rs1, $rs1)>;
+
+def : PatSetCC<FPR64, any_fsetccs, SETLT, FLT_D>;
+def : PatSetCC<FPR64, any_fsetccs, SETOLT, FLT_D>;
+def : PatSetCC<FPR64, any_fsetccs, SETLE, FLE_D>;
+def : PatSetCC<FPR64, any_fsetccs, SETOLE, FLE_D>;
def Select_FPR64_Using_CC_GPR : SelectCC_rrirr<FPR64, GPR>;
@@ -269,20 +285,22 @@ let Predicates = [HasStdExtD, IsRV32] in {
/// Float constants
def : Pat<(f64 (fpimm0)), (FCVT_D_W (i32 X0))>;
+def : Pat<(f64 (fpimmneg0)), (FSGNJN_D (FCVT_D_W (i32 X0)),
+ (FCVT_D_W (i32 X0)))>;
// double->[u]int. Round-to-zero must be used.
def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>;
def : Pat<(i32 (any_fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>;
// Saturating double->[u]int32.
-def : Pat<(i32 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_W_D $rs1, 0b001)>;
-def : Pat<(i32 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_WU_D $rs1, 0b001)>;
+def : Pat<(i32 (riscv_fcvt_x FPR64:$rs1, timm:$frm)), (FCVT_W_D $rs1, timm:$frm)>;
+def : Pat<(i32 (riscv_fcvt_xu FPR64:$rs1, timm:$frm)), (FCVT_WU_D $rs1, timm:$frm)>;
// float->int32 with current rounding mode.
-def : Pat<(i32 (lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>;
+def : Pat<(i32 (any_lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>;
// float->int32 rounded to nearest with ties rounded away from zero.
-def : Pat<(i32 (lround FPR64:$rs1)), (FCVT_W_D $rs1, 0b100)>;
+def : Pat<(i32 (any_lround FPR64:$rs1)), (FCVT_W_D $rs1, 0b100)>;
// [u]int->double.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>;
@@ -293,6 +311,8 @@ let Predicates = [HasStdExtD, IsRV64] in {
/// Float constants
def : Pat<(f64 (fpimm0)), (FMV_D_X (i64 X0))>;
+def : Pat<(f64 (fpimmneg0)), (FSGNJN_D (FMV_D_X (i64 X0)),
+ (FMV_D_X (i64 X0)))>;
// Moves (no conversion)
def : Pat<(bitconvert (i64 GPR:$rs1)), (FMV_D_X GPR:$rs1)>;
@@ -301,28 +321,28 @@ def : Pat<(i64 (bitconvert FPR64:$rs1)), (FMV_X_D FPR64:$rs1)>;
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>;
-def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>;
+def : Pat<(riscv_any_fcvt_w_rv64 FPR64:$rs1, timm:$frm), (FCVT_W_D $rs1, timm:$frm)>;
+def : Pat<(riscv_any_fcvt_wu_rv64 FPR64:$rs1, timm:$frm), (FCVT_WU_D $rs1, timm:$frm)>;
// [u]int32->fp
def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>;
def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>;
// Saturating double->[u]int64.
-def : Pat<(i64 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_L_D $rs1, 0b001)>;
-def : Pat<(i64 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_LU_D $rs1, 0b001)>;
+def : Pat<(i64 (riscv_fcvt_x FPR64:$rs1, timm:$frm)), (FCVT_L_D $rs1, timm:$frm)>;
+def : Pat<(i64 (riscv_fcvt_xu FPR64:$rs1, timm:$frm)), (FCVT_LU_D $rs1, timm:$frm)>;
// double->[u]int64. Round-to-zero must be used.
def : Pat<(i64 (any_fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>;
def : Pat<(i64 (any_fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>;
// double->int64 with current rounding mode.
-def : Pat<(i64 (lrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>;
-def : Pat<(i64 (llrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>;
+def : Pat<(i64 (any_lrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>;
+def : Pat<(i64 (any_llrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>;
// double->int64 rounded to nearest with ties rounded away from zero.
-def : Pat<(i64 (lround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>;
-def : Pat<(i64 (llround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>;
+def : Pat<(i64 (any_lround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>;
+def : Pat<(i64 (any_llround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>;
// [u]int64->fp. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index bb45ed859442..a8ac06ba8da3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -20,36 +20,38 @@ def SDT_RISCVFMV_W_X_RV64
def SDT_RISCVFMV_X_ANYEXTW_RV64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
def SDT_RISCVFCVT_W_RV64
- : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisFP<1>]>;
+ : SDTypeProfile<1, 2, [SDTCisVT<0, i64>, SDTCisFP<1>,
+ SDTCisVT<2, i64>]>;
def SDT_RISCVFCVT_X
- : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>;
+ : SDTypeProfile<1, 2, [SDTCisVT<0, XLenVT>, SDTCisFP<1>,
+ SDTCisVT<2, XLenVT>]>;
def riscv_fmv_w_x_rv64
: SDNode<"RISCVISD::FMV_W_X_RV64", SDT_RISCVFMV_W_X_RV64>;
def riscv_fmv_x_anyextw_rv64
: SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>;
-def riscv_fcvt_w_rtz_rv64
- : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64>;
-def riscv_fcvt_wu_rtz_rv64
- : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64>;
-def riscv_fcvt_x_rtz
- : SDNode<"RISCVISD::FCVT_X_RTZ", SDT_RISCVFCVT_X>;
-def riscv_fcvt_xu_rtz
- : SDNode<"RISCVISD::FCVT_XU_RTZ", SDT_RISCVFCVT_X>;
-
-def riscv_strict_fcvt_w_rtz_rv64
- : SDNode<"RISCVISD::STRICT_FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64,
+def riscv_fcvt_w_rv64
+ : SDNode<"RISCVISD::FCVT_W_RV64", SDT_RISCVFCVT_W_RV64>;
+def riscv_fcvt_wu_rv64
+ : SDNode<"RISCVISD::FCVT_WU_RV64", SDT_RISCVFCVT_W_RV64>;
+def riscv_fcvt_x
+ : SDNode<"RISCVISD::FCVT_X", SDT_RISCVFCVT_X>;
+def riscv_fcvt_xu
+ : SDNode<"RISCVISD::FCVT_XU", SDT_RISCVFCVT_X>;
+
+def riscv_strict_fcvt_w_rv64
+ : SDNode<"RISCVISD::STRICT_FCVT_W_RV64", SDT_RISCVFCVT_W_RV64,
[SDNPHasChain]>;
-def riscv_strict_fcvt_wu_rtz_rv64
- : SDNode<"RISCVISD::STRICT_FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64,
+def riscv_strict_fcvt_wu_rv64
+ : SDNode<"RISCVISD::STRICT_FCVT_WU_RV64", SDT_RISCVFCVT_W_RV64,
[SDNPHasChain]>;
-def riscv_any_fcvt_w_rtz_rv64 : PatFrags<(ops node:$src),
- [(riscv_strict_fcvt_w_rtz_rv64 node:$src),
- (riscv_fcvt_w_rtz_rv64 node:$src)]>;
-def riscv_any_fcvt_wu_rtz_rv64 : PatFrags<(ops node:$src),
- [(riscv_strict_fcvt_wu_rtz_rv64 node:$src),
- (riscv_fcvt_wu_rtz_rv64 node:$src)]>;
+def riscv_any_fcvt_w_rv64 : PatFrags<(ops node:$src, node:$frm),
+ [(riscv_strict_fcvt_w_rv64 node:$src, node:$frm),
+ (riscv_fcvt_w_rv64 node:$src, node:$frm)]>;
+def riscv_any_fcvt_wu_rv64 : PatFrags<(ops node:$src, node:$frm),
+ [(riscv_strict_fcvt_wu_rv64 node:$src, node:$frm),
+ (riscv_fcvt_wu_rv64 node:$src, node:$frm)]>;
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
@@ -73,6 +75,22 @@ def frmarg : Operand<XLenVT> {
// Instruction class templates
//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+class FPLoad_r<bits<3> funct3, string opcodestr, RegisterClass rty,
+ SchedWrite sw>
+ : RVInstI<funct3, OPC_LOAD_FP, (outs rty:$rd),
+ (ins GPR:$rs1, simm12:$imm12),
+ opcodestr, "$rd, ${imm12}(${rs1})">,
+ Sched<[sw, ReadFMemBase]>;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+class FPStore_r<bits<3> funct3, string opcodestr, RegisterClass rty,
+ SchedWrite sw>
+ : RVInstS<funct3, OPC_STORE_FP, (outs),
+ (ins rty:$rs2, GPR:$rs1, simm12:$imm12),
+ opcodestr, "$rs2, ${imm12}(${rs1})">,
+ Sched<[sw, ReadStoreData, ReadFMemBase]>;
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
UseNamedOperandTable = 1, hasPostISelHook = 1 in
class FPFMA_rrr_frm<RISCVOpcode opcode, bits<2> funct2, string opcodestr,
@@ -138,20 +156,12 @@ class FPCmp_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtF] in {
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
-def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd),
- (ins GPR:$rs1, simm12:$imm12),
- "flw", "$rd, ${imm12}(${rs1})">,
- Sched<[WriteFLD32, ReadFMemBase]>;
+def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>;
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
// encoding.
-let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
-def FSW : RVInstS<0b010, OPC_STORE_FP, (outs),
- (ins FPR32:$rs2, GPR:$rs1, simm12:$imm12),
- "fsw", "$rs2, ${imm12}(${rs1})">,
- Sched<[WriteFST32, ReadStoreData, ReadFMemBase]>;
+def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>;
let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in {
def FMADD_S : FPFMA_rrr_frm<OPC_MADD, 0b00, "fmadd.s", FPR32>;
@@ -299,6 +309,10 @@ def : MnemonicAlias<"fmv.x.s", "fmv.x.w">;
def PseudoFLW : PseudoFloatLoad<"flw", FPR32>;
def PseudoFSW : PseudoStore<"fsw", FPR32>;
+let usesCustomInserter = 1 in {
+def PseudoQuietFLE_S : PseudoQuietFCMP<FPR32>;
+def PseudoQuietFLT_S : PseudoQuietFCMP<FPR32>;
+}
} // Predicates = [HasStdExtF]
//===----------------------------------------------------------------------===//
@@ -306,9 +320,13 @@ def PseudoFSW : PseudoStore<"fsw", FPR32>;
//===----------------------------------------------------------------------===//
/// Floating point constants
-def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
+def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
+def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
/// Generic pattern classes
+class PatSetCC<RegisterClass Ty, SDPatternOperator OpNode, CondCode Cond, RVInst Inst>
+ : Pat<(OpNode Ty:$rs1, Ty:$rs2, Cond), (Inst $rs1, $rs2)>;
+
class PatFpr32Fpr32<SDPatternOperator OpNode, RVInstR Inst>
: Pat<(OpNode FPR32:$rs1, FPR32:$rs2), (Inst $rs1, $rs2)>;
@@ -319,6 +337,7 @@ let Predicates = [HasStdExtF] in {
/// Float constants
def : Pat<(f32 (fpimm0)), (FMV_W_X X0)>;
+def : Pat<(f32 (fpimmneg0)), (FSGNJN_S (FMV_W_X X0), (FMV_W_X X0))>;
/// Float conversion operations
@@ -363,13 +382,34 @@ def : PatFpr32Fpr32<fminnum, FMIN_S>;
def : PatFpr32Fpr32<fmaxnum, FMAX_S>;
/// Setcc
-
-def : PatFpr32Fpr32<seteq, FEQ_S>;
-def : PatFpr32Fpr32<setoeq, FEQ_S>;
-def : PatFpr32Fpr32<setlt, FLT_S>;
-def : PatFpr32Fpr32<setolt, FLT_S>;
-def : PatFpr32Fpr32<setle, FLE_S>;
-def : PatFpr32Fpr32<setole, FLE_S>;
+// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
+// strict versions of those.
+
+// Match non-signaling FEQ_S
+def : PatSetCC<FPR32, any_fsetcc, SETEQ, FEQ_S>;
+def : PatSetCC<FPR32, any_fsetcc, SETOEQ, FEQ_S>;
+def : PatSetCC<FPR32, strict_fsetcc, SETLT, PseudoQuietFLT_S>;
+def : PatSetCC<FPR32, strict_fsetcc, SETOLT, PseudoQuietFLT_S>;
+def : PatSetCC<FPR32, strict_fsetcc, SETLE, PseudoQuietFLE_S>;
+def : PatSetCC<FPR32, strict_fsetcc, SETOLE, PseudoQuietFLE_S>;
+
+// Match signaling FEQ_S
+def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs2, SETEQ),
+ (AND (FLE_S $rs1, $rs2),
+ (FLE_S $rs2, $rs1))>;
+def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs2, SETOEQ),
+ (AND (FLE_S $rs1, $rs2),
+ (FLE_S $rs2, $rs1))>;
+// If both operands are the same, use a single FLE.
+def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs1, SETEQ),
+ (FLE_S $rs1, $rs1)>;
+def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs1, SETOEQ),
+ (FLE_S $rs1, $rs1)>;
+
+def : PatSetCC<FPR32, any_fsetccs, SETLT, FLT_S>;
+def : PatSetCC<FPR32, any_fsetccs, SETOLT, FLT_S>;
+def : PatSetCC<FPR32, any_fsetccs, SETLE, FLE_S>;
+def : PatSetCC<FPR32, any_fsetccs, SETOLE, FLE_S>;
def Select_FPR32_Using_CC_GPR : SelectCC_rrirr<FPR32, GPR>;
@@ -393,14 +433,14 @@ def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>;
def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>;
// Saturating float->[u]int32.
-def : Pat<(i32 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>;
-def : Pat<(i32 (riscv_fcvt_xu_rtz FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>;
+def : Pat<(i32 (riscv_fcvt_x FPR32:$rs1, timm:$frm)), (FCVT_W_S $rs1, timm:$frm)>;
+def : Pat<(i32 (riscv_fcvt_xu FPR32:$rs1, timm:$frm)), (FCVT_WU_S $rs1, timm:$frm)>;
// float->int32 with current rounding mode.
-def : Pat<(i32 (lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>;
+def : Pat<(i32 (any_lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>;
// float->int32 rounded to nearest with ties rounded away from zero.
-def : Pat<(i32 (lround FPR32:$rs1)), (FCVT_W_S $rs1, 0b100)>;
+def : Pat<(i32 (any_lround FPR32:$rs1)), (FCVT_W_S $rs1, 0b100)>;
// [u]int->float. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>;
@@ -417,24 +457,24 @@ def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32),
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
-def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
+def : Pat<(riscv_any_fcvt_w_rv64 FPR32:$rs1, timm:$frm), (FCVT_W_S $rs1, timm:$frm)>;
+def : Pat<(riscv_any_fcvt_wu_rv64 FPR32:$rs1, timm:$frm), (FCVT_WU_S $rs1, timm:$frm)>;
// float->[u]int64. Round-to-zero must be used.
def : Pat<(i64 (any_fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>;
def : Pat<(i64 (any_fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>;
// Saturating float->[u]int64.
-def : Pat<(i64 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>;
-def : Pat<(i64 (riscv_fcvt_xu_rtz FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>;
+def : Pat<(i64 (riscv_fcvt_x FPR32:$rs1, timm:$frm)), (FCVT_L_S $rs1, timm:$frm)>;
+def : Pat<(i64 (riscv_fcvt_xu FPR32:$rs1, timm:$frm)), (FCVT_LU_S $rs1, timm:$frm)>;
// float->int64 with current rounding mode.
-def : Pat<(i64 (lrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>;
-def : Pat<(i64 (llrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>;
+def : Pat<(i64 (any_lrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>;
+def : Pat<(i64 (any_llrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>;
// float->int64 rounded to neartest with ties rounded away from zero.
-def : Pat<(i64 (lround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>;
-def : Pat<(i64 (llround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>;
+def : Pat<(i64 (any_lround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>;
+def : Pat<(i64 (any_llround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 173ae43a08d6..306024a3e4fd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -19,18 +19,22 @@ include "RISCVInstrFormatsV.td"
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
-def VTypeIAsmOperand : AsmOperandClass {
- let Name = "VTypeI";
+class VTypeIAsmOperand<int VTypeINum> : AsmOperandClass {
+ let Name = "VTypeI" # VTypeINum;
let ParserMethod = "parseVTypeI";
let DiagnosticType = "InvalidVTypeI";
+ let RenderMethod = "addVTypeIOperands";
}
-def VTypeIOp : Operand<XLenVT> {
- let ParserMatchClass = VTypeIAsmOperand;
+class VTypeIOp<int VTypeINum> : Operand<XLenVT> {
+ let ParserMatchClass = VTypeIAsmOperand<VTypeINum>;
let PrintMethod = "printVTypeI";
- let DecoderMethod = "decodeUImmOperand<11>";
+ let DecoderMethod = "decodeUImmOperand<"#VTypeINum#">";
}
+def VTypeIOp10 : VTypeIOp<10>;
+def VTypeIOp11 : VTypeIOp<11>;
+
def VMaskAsmOperand : AsmOperandClass {
let Name = "RVVMaskRegOpOperand";
let RenderMethod = "addRegOperands";
@@ -77,6 +81,9 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
}];
}
+def simm5_plus1_nonzero : ImmLeaf<XLenVT,
+ [{return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);}]>;
+
//===----------------------------------------------------------------------===//
// Scheduling definitions.
//===----------------------------------------------------------------------===//
@@ -342,6 +349,27 @@ class VALUVs2<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr>
// Combination of instruction classes.
// Use these multiclasses to define instructions more easily.
//===----------------------------------------------------------------------===//
+
+multiclass VIndexLoadStore<list<int> EEWList> {
+ foreach n = EEWList in {
+ defvar w = !cast<RISCVWidth>("LSWidth" # n);
+
+ def VLUXEI # n # _V :
+ VIndexedLoad<MOPLDIndexedUnord, w, "vluxei" # n # ".v">,
+ VLXSched<n, "U">;
+ def VLOXEI # n # _V :
+ VIndexedLoad<MOPLDIndexedOrder, w, "vloxei" # n # ".v">,
+ VLXSched<n, "O">;
+
+ def VSUXEI # n # _V :
+ VIndexedStore<MOPSTIndexedUnord, w, "vsuxei" # n # ".v">,
+ VSXSched<n, "U">;
+ def VSOXEI # n # _V :
+ VIndexedStore<MOPSTIndexedOrder, w, "vsoxei" # n # ".v">,
+ VSXSched<n, "O">;
+ }
+}
+
multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>;
@@ -757,7 +785,7 @@ multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
}
multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> {
- foreach l = [8, 16, 32, 64] in {
+ foreach l = [8, 16, 32] in {
defvar w = !cast<RISCVWidth>("LSWidth" # l);
defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R" # l);
@@ -765,23 +793,27 @@ multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> {
Sched<[s, ReadVLDX]>;
}
}
+multiclass VWholeLoadEEW64<bits<3> nf, string opcodestr, RegisterClass VRC, SchedReadWrite schedrw> {
+ def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v", VRC>,
+ Sched<[schedrw, ReadVLDX]>;
+}
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
-def VSETVLI : RVInstSetVLi<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei),
+def VSETVLI : RVInstSetVLi<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp11:$vtypei),
"vsetvli", "$rd, $rs1, $vtypei">;
-def VSETIVLI : RVInstSetiVLi<(outs GPR:$rd), (ins uimm5:$uimm, VTypeIOp:$vtypei),
+def VSETIVLI : RVInstSetiVLi<(outs GPR:$rd), (ins uimm5:$uimm, VTypeIOp10:$vtypei),
"vsetivli", "$rd, $uimm, $vtypei">;
def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
"vsetvl", "$rd, $rs1, $rs2">;
} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
-foreach eew = [8, 16, 32, 64] in {
+foreach eew = [8, 16, 32] in {
defvar w = !cast<RISCVWidth>("LSWidth" # eew);
// Vector Unit-Stride Instructions
@@ -794,18 +826,12 @@ foreach eew = [8, 16, 32, 64] in {
// Vector Strided Instructions
def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSched<eew>;
def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSched<eew>;
-
- // Vector Indexed Instructions
- def VLUXEI#eew#_V :
- VIndexedLoad<MOPLDIndexedUnord, w, "vluxei"#eew#".v">, VLXSched<eew, "U">;
- def VLOXEI#eew#_V :
- VIndexedLoad<MOPLDIndexedOrder, w, "vloxei"#eew#".v">, VLXSched<eew, "O">;
- def VSUXEI#eew#_V :
- VIndexedStore<MOPSTIndexedUnord, w, "vsuxei"#eew#".v">, VSXSched<eew, "U">;
- def VSOXEI#eew#_V :
- VIndexedStore<MOPSTIndexedOrder, w, "vsoxei"#eew#".v">, VSXSched<eew, "O">;
}
+defm "" : VIndexLoadStore<[8, 16, 32]>;
+} // Predicates = [HasVInstructions]
+
+let Predicates = [HasVInstructions] in {
def VLM_V : VUnitStrideLoadMask<"vlm.v">,
Sched<[WriteVLDM, ReadVLDX]>;
def VSM_V : VUnitStrideStoreMask<"vsm.v">,
@@ -820,11 +846,6 @@ defm VL2R : VWholeLoadN<1, "vl2r", VRM2>;
defm VL4R : VWholeLoadN<3, "vl4r", VRM4>;
defm VL8R : VWholeLoadN<7, "vl8r", VRM8>;
-def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
-def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VRM2:$vd, GPR:$rs1)>;
-def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VRM4:$vd, GPR:$rs1)>;
-def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>;
-
def VS1R_V : VWholeStore<0, "vs1r.v", VR>,
Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>;
def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>,
@@ -834,6 +855,40 @@ def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>,
def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>,
Sched<[WriteVST8R, ReadVST8R, ReadVSTX]>;
+def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
+def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VRM2:$vd, GPR:$rs1)>;
+def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VRM4:$vd, GPR:$rs1)>;
+def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>;
+} // Predicates = [HasVInstructions]
+
+let Predicates = [HasVInstructionsI64] in {
+// Vector Unit-Stride Instructions
+def VLE64_V : VUnitStrideLoad<LSWidth64, "vle64.v">,
+ VLESched<64>;
+
+def VLE64FF_V : VUnitStrideLoadFF<LSWidth64, "vle64ff.v">,
+ VLFSched<64>;
+
+def VSE64_V : VUnitStrideStore<LSWidth64, "vse64.v">,
+ VSESched<64>;
+// Vector Strided Instructions
+def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">,
+ VLSSched<32>;
+
+def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">,
+ VSSSched<64>;
+
+defm VL1R: VWholeLoadEEW64<0, "vl1r", VR, WriteVLD1R64>;
+defm VL2R: VWholeLoadEEW64<1, "vl2r", VRM2, WriteVLD2R64>;
+defm VL4R: VWholeLoadEEW64<3, "vl4r", VRM4, WriteVLD4R64>;
+defm VL8R: VWholeLoadEEW64<7, "vl8r", VRM8, WriteVLD8R64>;
+} // Predicates = [HasVInstructionsI64]
+let Predicates = [IsRV64, HasVInstructionsI64] in {
+ // Vector Indexed Instructions
+ defm "" : VIndexLoadStore<[64]>;
+} // [IsRV64, HasVInstructionsI64]
+
+let Predicates = [HasVInstructions] in {
// Vector Single-Width Integer Add and Subtract
defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
defm VSUB_V : VALU_IV_V_X<"vsub", 0b000010>;
@@ -1065,9 +1120,9 @@ let Constraints = "@earlyclobber $vd" in {
defm VNCLIPU_W : VNCLP_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">;
defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">;
} // Constraints = "@earlyclobber $vd"
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
// Vector Single-Width Floating-Point Add/Subtract Instructions
defm VFADD_V : VALU_FV_V_F<"vfadd", 0b000000>;
defm VFSUB_V : VALU_FV_V_F<"vfsub", 0b000010>;
@@ -1202,9 +1257,9 @@ defm VFNCVT_F_X_W : VNCVTF_IV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>;
defm VFNCVT_F_F_W : VNCVTF_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>;
defm VFNCVT_ROD_F_F_W : VNCVTF_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>;
} // Constraints = "@earlyclobber $vd"
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = HasVInstructionsAnyF]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
// Vector Single-Width Integer Reduction Instructions
let RVVConstraint = NoConstraint in {
@@ -1228,9 +1283,9 @@ defm VWREDSUMU : VWRED_IV_V<"vwredsumu", 0b110000>;
defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
// Vector Single-Width Floating-Point Reduction Instructions
let RVVConstraint = NoConstraint in {
defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>;
@@ -1254,9 +1309,9 @@ defm VFWREDUSUM : VWRED_FV_V<"vfwredusum", 0b110001>;
def : InstAlias<"vfwredsum.vs $vd, $vs2, $vs1$vm",
(VFWREDUSUM_VS VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm), 0>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
// Vector Mask-Register Logical Instructions
let RVVConstraint = NoConstraint in {
defm VMAND_M : VMALU_MV_Mask<"vmand", 0b011001, "m">;
@@ -1337,9 +1392,9 @@ def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VR:$vd_wb),
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1,
RVVConstraint = NoConstraint in {
@@ -1354,9 +1409,9 @@ def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VR:$vd_wb),
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
// Vector Slide Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, uimm5>;
@@ -1364,16 +1419,16 @@ defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, uimm5>;
defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
// Vector Register Gather Instruction
let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in {
defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100, uimm5>;
@@ -1404,11 +1459,11 @@ foreach n = [2, 4, 8] in {
}
}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtZvlsseg] in {
+let Predicates = [HasVInstructions] in {
foreach nf=2-8 in {
- foreach eew = [8, 16, 32, 64] in {
+ foreach eew = [8, 16, 32] in {
defvar w = !cast<RISCVWidth>("LSWidth"#eew);
def VLSEG#nf#E#eew#_V :
@@ -1439,6 +1494,41 @@ let Predicates = [HasStdExtZvlsseg] in {
"vsoxseg"#nf#"ei"#eew#".v">;
}
}
-} // Predicates = [HasStdExtZvlsseg]
+} // Predicates = [HasVInstructions]
+
+let Predicates = [HasVInstructionsI64] in {
+ foreach nf=2-8 in {
+ // Vector Unit-strided Segment Instructions
+ def VLSEG#nf#E64_V :
+ VUnitStrideSegmentLoad<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64.v">;
+ def VLSEG#nf#E64FF_V :
+ VUnitStrideSegmentLoadFF<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64ff.v">;
+ def VSSEG#nf#E64_V :
+ VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">;
+
+ // Vector Strided Segment Instructions
+ def VLSSEG#nf#E64_V :
+ VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">;
+ def VSSSEG#nf#E64_V :
+ VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">;
+ }
+} // Predicates = [HasVInstructionsI64]
+let Predicates = [HasVInstructionsI64, IsRV64] in {
+ foreach nf=2-8 in {
+ // Vector Indexed Segment Instructions
+ def VLUXSEG#nf#EI64_V :
+ VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, LSWidth64,
+ "vluxseg"#nf#"ei64.v">;
+ def VLOXSEG#nf#EI64_V :
+ VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, LSWidth64,
+ "vloxseg"#nf#"ei64.v">;
+ def VSUXSEG#nf#EI64_V :
+ VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, LSWidth64,
+ "vsuxseg"#nf#"ei64.v">;
+ def VSOXSEG#nf#EI64_V :
+ VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, LSWidth64,
+ "vsoxseg"#nf#"ei64.v">;
+ }
+} // Predicates = [HasVInstructionsI64, IsRV64]
include "RISCVInstrInfoVPseudos.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 073fa605e0fb..4e7e251bc412 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -71,49 +71,45 @@ def V_MF4 : LMULInfo<0b110, 2, VR, VR, VR,/*NoVReg*/VR,/*NoVReg*/VR, "M
def V_MF2 : LMULInfo<0b111, 4, VR, VR, VR, VR,/*NoVReg*/VR, "MF2">;
// Used to iterate over all possible LMULs.
-def MxList {
- list<LMULInfo> m = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
-}
+defvar MxList = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
+// For floating point which don't need MF8.
+defvar MxListF = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
+
// Used for widening and narrowing instructions as it doesn't contain M8.
-def MxListW {
- list<LMULInfo> m = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4];
-}
+defvar MxListW = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4];
+// For floating point which don't need MF8.
+defvar MxListFW = [V_MF4, V_MF2, V_M1, V_M2, V_M4];
+
// Use for zext/sext.vf2
-def MxListVF2 {
- list<LMULInfo> m = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
-}
+defvar MxListVF2 = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
+
// Use for zext/sext.vf4
-def MxListVF4 {
- list<LMULInfo> m = [V_MF2, V_M1, V_M2, V_M4, V_M8];
-}
+defvar MxListVF4 = [V_MF2, V_M1, V_M2, V_M4, V_M8];
+
// Use for zext/sext.vf8
-def MxListVF8 {
- list<LMULInfo> m = [V_M1, V_M2, V_M4, V_M8];
+defvar MxListVF8 = [V_M1, V_M2, V_M4, V_M8];
+
+class MxSet<int eew> {
+ list<LMULInfo> m = !cond(!eq(eew, 8) : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8],
+ !eq(eew, 16) : [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8],
+ !eq(eew, 32) : [V_MF2, V_M1, V_M2, V_M4, V_M8],
+ !eq(eew, 64) : [V_M1, V_M2, V_M4, V_M8]);
}
-class FPR_Info<RegisterClass regclass, string fx> {
+class FPR_Info<RegisterClass regclass, string fx, list<LMULInfo> mxlist> {
RegisterClass fprclass = regclass;
string FX = fx;
+ list<LMULInfo> MxList = mxlist;
}
-def SCALAR_F16 : FPR_Info<FPR16, "F16">;
-def SCALAR_F32 : FPR_Info<FPR32, "F32">;
-def SCALAR_F64 : FPR_Info<FPR64, "F64">;
+def SCALAR_F16 : FPR_Info<FPR16, "F16", MxSet<16>.m>;
+def SCALAR_F32 : FPR_Info<FPR32, "F32", MxSet<32>.m>;
+def SCALAR_F64 : FPR_Info<FPR64, "F64", MxSet<64>.m>;
-def FPList {
- list<FPR_Info> fpinfo = [SCALAR_F16, SCALAR_F32, SCALAR_F64];
-}
-// Used for widening instructions. It excludes F64.
-def FPListW {
- list<FPR_Info> fpinfo = [SCALAR_F16, SCALAR_F32];
-}
+defvar FPList = [SCALAR_F16, SCALAR_F32, SCALAR_F64];
-class MxSet<int eew> {
- list<LMULInfo> m = !cond(!eq(eew, 8) : [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8],
- !eq(eew, 16) : [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8],
- !eq(eew, 32) : [V_MF2, V_M1, V_M2, V_M4, V_M8],
- !eq(eew, 64) : [V_M1, V_M2, V_M4, V_M8]);
-}
+// Used for widening instructions. It excludes F64.
+defvar FPListW = [SCALAR_F16, SCALAR_F32];
class NFSet<LMULInfo m> {
list<int> L = !cond(!eq(m.value, V_M8.value): [],
@@ -236,25 +232,25 @@ defset list<VTypeInfo> AllVectors = {
defset list<GroupVTypeInfo> GroupFloatVectors = {
def VF16M2: GroupVTypeInfo<vfloat16m2_t, vfloat16m1_t, vbool8_t, 16,
- VRM2, V_M2, f16, FPR16>;
+ VRM2, V_M2, f16, FPR16>;
def VF16M4: GroupVTypeInfo<vfloat16m4_t, vfloat16m1_t, vbool4_t, 16,
- VRM4, V_M4, f16, FPR16>;
+ VRM4, V_M4, f16, FPR16>;
def VF16M8: GroupVTypeInfo<vfloat16m8_t, vfloat16m1_t, vbool2_t, 16,
- VRM8, V_M8, f16, FPR16>;
+ VRM8, V_M8, f16, FPR16>;
def VF32M2: GroupVTypeInfo<vfloat32m2_t, vfloat32m1_t, vbool16_t, 32,
- VRM2, V_M2, f32, FPR32>;
+ VRM2, V_M2, f32, FPR32>;
def VF32M4: GroupVTypeInfo<vfloat32m4_t, vfloat32m1_t, vbool8_t, 32,
- VRM4, V_M4, f32, FPR32>;
+ VRM4, V_M4, f32, FPR32>;
def VF32M8: GroupVTypeInfo<vfloat32m8_t, vfloat32m1_t, vbool4_t, 32,
- VRM8, V_M8, f32, FPR32>;
+ VRM8, V_M8, f32, FPR32>;
def VF64M2: GroupVTypeInfo<vfloat64m2_t, vfloat64m1_t, vbool32_t, 64,
- VRM2, V_M2, f64, FPR64>;
+ VRM2, V_M2, f64, FPR64>;
def VF64M4: GroupVTypeInfo<vfloat64m4_t, vfloat64m1_t, vbool16_t, 64,
- VRM4, V_M4, f64, FPR64>;
+ VRM4, V_M4, f64, FPR64>;
def VF64M8: GroupVTypeInfo<vfloat64m8_t, vfloat64m1_t, vbool8_t, 64,
- VRM8, V_M8, f64, FPR64>;
+ VRM8, V_M8, f64, FPR64>;
}
}
}
@@ -423,13 +419,14 @@ def RISCVVPseudosTable : GenericTable {
def RISCVVIntrinsicsTable : GenericTable {
let FilterClass = "RISCVVIntrinsic";
let CppTypeName = "RISCVVIntrinsicInfo";
- let Fields = ["IntrinsicID", "SplatOperand"];
+ let Fields = ["IntrinsicID", "SplatOperand", "VLOperand"];
let PrimaryKey = ["IntrinsicID"];
let PrimaryKeyName = "getRISCVVIntrinsicInfo";
}
-class RISCVVLE<bit M, bit Str, bit F, bits<3> S, bits<3> L> {
+class RISCVVLE<bit M, bit TU, bit Str, bit F, bits<3> S, bits<3> L> {
bits<1> Masked = M;
+ bits<1> IsTU = TU;
bits<1> Strided = Str;
bits<1> FF = F;
bits<3> Log2SEW = S;
@@ -440,8 +437,8 @@ class RISCVVLE<bit M, bit Str, bit F, bits<3> S, bits<3> L> {
def RISCVVLETable : GenericTable {
let FilterClass = "RISCVVLE";
let CppTypeName = "VLEPseudo";
- let Fields = ["Masked", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"];
- let PrimaryKey = ["Masked", "Strided", "FF", "Log2SEW", "LMUL"];
+ let Fields = ["Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"];
+ let PrimaryKey = ["Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL"];
let PrimaryKeyName = "getVLEPseudo";
}
@@ -461,8 +458,9 @@ def RISCVVSETable : GenericTable {
let PrimaryKeyName = "getVSEPseudo";
}
-class RISCVVLX_VSX<bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> {
+class RISCVVLX_VSX<bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3> IL> {
bits<1> Masked = M;
+ bits<1> IsTU = TU;
bits<1> Ordered = O;
bits<3> Log2SEW = S;
bits<3> LMUL = L;
@@ -470,15 +468,15 @@ class RISCVVLX_VSX<bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> {
Pseudo Pseudo = !cast<Pseudo>(NAME);
}
-class RISCVVLX<bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> :
- RISCVVLX_VSX<M, O, S, L, IL>;
+class RISCVVLX<bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3> IL> :
+ RISCVVLX_VSX<M, TU, O, S, L, IL>;
class RISCVVSX<bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> :
- RISCVVLX_VSX<M, O, S, L, IL>;
+ RISCVVLX_VSX<M, /*TU*/0, O, S, L, IL>;
class RISCVVLX_VSXTable : GenericTable {
let CppTypeName = "VLX_VSXPseudo";
- let Fields = ["Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"];
- let PrimaryKey = ["Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"];
+ let Fields = ["Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"];
+ let PrimaryKey = ["Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"];
}
def RISCVVLXTable : RISCVVLX_VSXTable {
@@ -583,10 +581,11 @@ class PseudoToVInst<string PseudoInst> {
!subst("_B64", "",
!subst("_MASK", "",
!subst("_TIED", "",
+ !subst("_TU", "",
!subst("F16", "F",
!subst("F32", "F",
!subst("F64", "F",
- !subst("Pseudo", "", PseudoInst))))))))))))))))))));
+ !subst("Pseudo", "", PseudoInst)))))))))))))))))))));
}
// The destination vector register group for a masked vector instruction cannot
@@ -632,7 +631,7 @@ class VPseudoUSLoadNoMask<VReg RetClass, int EEW, bit isFF> :
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/0, /*TU*/0, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -642,13 +641,29 @@ class VPseudoUSLoadNoMask<VReg RetClass, int EEW, bit isFF> :
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+class VPseudoUSLoadNoMaskTU<VReg RetClass, int EEW, bit isFF> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$dest, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVVLE</*Masked*/0, /*TU*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let HasMergeOp = 1;
+ let Constraints = "$rd = $dest";
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
class VPseudoUSLoadMask<VReg RetClass, int EEW, bit isFF> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/1, /*TU*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -664,7 +679,7 @@ class VPseudoSLoadNoMask<VReg RetClass, int EEW>:
Pseudo<(outs RetClass:$rd),
(ins GPR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/0, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/0, /*TU*/0, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -674,13 +689,29 @@ class VPseudoSLoadNoMask<VReg RetClass, int EEW>:
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+class VPseudoSLoadNoMaskTU<VReg RetClass, int EEW>:
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$dest, GPR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVVLE</*Masked*/0, /*TU*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let HasMergeOp = 1;
+ let Constraints = "$rd = $dest";
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
class VPseudoSLoadMask<VReg RetClass, int EEW>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1, GPR:$rs2,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/1, /*TU*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -695,9 +726,10 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
bit Ordered, bit EarlyClobber>:
Pseudo<(outs RetClass:$rd),
- (ins GPR:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins GPR:$rs1, IdxClass:$rs2, AVL:$vl,
+ ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVLX</*Masked*/0, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVLX</*Masked*/0, /*TU*/0, Ordered, log2<EEW>.val, VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -708,6 +740,24 @@ class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+class VPseudoILoadNoMaskTU<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
+ bit Ordered, bit EarlyClobber>:
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$dest, GPR:$rs1, IdxClass:$rs2, AVL:$vl,
+ ixlenimm:$sew),[]>,
+ RISCVVPseudo,
+ RISCVVLX</*Masked*/0, /*TU*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let HasMergeOp = 1;
+ let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $dest", "$rd = $dest");
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
bit Ordered, bit EarlyClobber>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
@@ -715,7 +765,7 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
GPR:$rs1, IdxClass:$rs2,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLX</*Masked*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVLX</*Masked*/1, /*TU*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -932,6 +982,9 @@ class VPseudoBinaryNoMask<VReg RetClass,
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+// Special version of VPseudoBinaryNoMask where we pretend the first source is
+// tied to the destination.
+// This allows maskedoff and rs2 to be the same register.
class VPseudoTiedBinaryNoMask<VReg RetClass,
DAGOperand Op2Class,
string Constraint> :
@@ -1083,6 +1136,30 @@ class VPseudoBinaryCarryIn<VReg RetClass,
let VLMul = MInfo.value;
}
+class VPseudoTiedBinaryCarryIn<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ bit CarryIn,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ !if(CarryIn,
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl,
+ ixlenimm:$sew),
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let HasVecPolicyOp = 0;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ let VLMul = MInfo.value;
+}
+
class VPseudoTernaryNoMask<VReg RetClass,
RegisterClass Op1Class,
DAGOperand Op2Class,
@@ -1323,6 +1400,9 @@ multiclass VPseudoUSLoad {
def "E" # eew # "_V_" # LInfo :
VPseudoUSLoadNoMask<vreg, eew, false>,
VLESched<eew>;
+ def "E" # eew # "_V_" # LInfo # "_TU":
+ VPseudoUSLoadNoMaskTU<vreg, eew, false>,
+ VLESched<eew>;
def "E" # eew # "_V_" # LInfo # "_MASK" :
VPseudoUSLoadMask<vreg, eew, false>,
VLESched<eew>;
@@ -1340,6 +1420,9 @@ multiclass VPseudoFFLoad {
def "E" # eew # "FF_V_" # LInfo :
VPseudoUSLoadNoMask<vreg, eew, true>,
VLFSched<eew>;
+ def "E" # eew # "FF_V_" # LInfo # "_TU":
+ VPseudoUSLoadNoMaskTU<vreg, eew, true>,
+ VLFSched<eew>;
def "E" # eew # "FF_V_" # LInfo # "_MASK" :
VPseudoUSLoadMask<vreg, eew, true>,
VLFSched<eew>;
@@ -1364,6 +1447,8 @@ multiclass VPseudoSLoad {
let VLMul = lmul.value in {
def "E" # eew # "_V_" # LInfo : VPseudoSLoadNoMask<vreg, eew>,
VLSSched<eew>;
+ def "E" # eew # "_V_" # LInfo # "_TU": VPseudoSLoadNoMaskTU<vreg, eew>,
+ VLSSched<eew>;
def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSLoadMask<vreg, eew>,
VLSSched<eew>;
}
@@ -1390,6 +1475,9 @@ multiclass VPseudoILoad<bit Ordered> {
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo :
VPseudoILoadNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>,
VLXSched<eew, Order>;
+ def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_TU":
+ VPseudoILoadNoMaskTU<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>,
+ VLXSched<eew, Order>;
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" :
VPseudoILoadMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>,
VLXSched<eew, Order>;
@@ -1504,7 +1592,7 @@ multiclass VPseudoVSFS_M {
}
multiclass VPseudoVID_V {
- foreach m = MxList.m in {
+ foreach m = MxList in {
let VLMul = m.value in {
def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>,
Sched<[WriteVMIdxV, ReadVMask]>;
@@ -1524,7 +1612,7 @@ multiclass VPseudoNullaryPseudoM <string BaseInst> {
multiclass VPseudoVIOT_M {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxList.m in {
+ foreach m = MxList in {
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>,
Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>;
@@ -1535,7 +1623,7 @@ multiclass VPseudoVIOT_M {
}
multiclass VPseudoVCPR_V {
- foreach m = MxList.m in {
+ foreach m = MxList in {
let VLMul = m.value in
def _VM # "_" # m.MX : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>,
Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>;
@@ -1596,12 +1684,18 @@ multiclass VPseudoTiedBinary<VReg RetClass,
}
multiclass VPseudoBinaryV_VV<string Constraint = ""> {
- foreach m = MxList.m in
+ foreach m = MxList in
+ defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>;
+}
+
+// Similar to VPseudoBinaryV_VV, but uses MxListF.
+multiclass VPseudoBinaryFV_VV<string Constraint = ""> {
+ foreach m = MxListF in
defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>;
}
multiclass VPseudoVGTR_VV_EEW<int eew, string Constraint = ""> {
- foreach m = MxList.m in {
+ foreach m = MxList in {
foreach sew = EEWList in {
defvar octuple_lmul = m.octuple;
// emul = lmul * eew / sew
@@ -1617,38 +1711,38 @@ multiclass VPseudoVGTR_VV_EEW<int eew, string Constraint = ""> {
}
multiclass VPseudoBinaryV_VX<string Constraint = ""> {
- foreach m = MxList.m in
+ foreach m = MxList in
defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>;
}
multiclass VPseudoVSLD1_VX<string Constraint = ""> {
- foreach m = MxList.m in
+ foreach m = MxList in
defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>,
Sched<[WriteVISlide1X, ReadVISlideV, ReadVISlideX, ReadVMask]>;
}
multiclass VPseudoBinaryV_VF<string Constraint = ""> {
- foreach m = MxList.m in
- foreach f = FPList.fpinfo in
+ foreach f = FPList in
+ foreach m = f.MxList in
defm "_V" # f.FX : VPseudoBinary<m.vrclass, m.vrclass,
f.fprclass, m, Constraint>;
}
multiclass VPseudoVSLD1_VF<string Constraint = ""> {
- foreach m = MxList.m in
- foreach f = FPList.fpinfo in
+ foreach f = FPList in
+ foreach m = f.MxList in
defm "_V" # f.FX :
VPseudoBinary<m.vrclass, m.vrclass, f.fprclass, m, Constraint>,
Sched<[WriteVFSlide1F, ReadVFSlideV, ReadVFSlideF, ReadVMask]>;
}
multiclass VPseudoBinaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
- foreach m = MxList.m in
+ foreach m = MxList in
defm _VI : VPseudoBinary<m.vrclass, m.vrclass, ImmType, m, Constraint>;
}
multiclass VPseudoVALU_MM {
- foreach m = MxList.m in
+ foreach m = MxList in
let VLMul = m.value in {
def "_MM_" # m.MX : VPseudoBinaryNoMask<VR, VR, VR, "">,
Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>;
@@ -1662,28 +1756,28 @@ multiclass VPseudoVALU_MM {
// * The destination EEW is greater than the source EEW, the source EMUL is
// at least 1, and the overlap is in the highest-numbered part of the
// destination register group is legal. Otherwise, it is illegal.
-multiclass VPseudoBinaryW_VV {
- foreach m = MxListW.m in
+multiclass VPseudoBinaryW_VV<list<LMULInfo> mxlist = MxListW> {
+ foreach m = mxlist in
defm _VV : VPseudoBinary<m.wvrclass, m.vrclass, m.vrclass, m,
"@earlyclobber $rd">;
}
multiclass VPseudoBinaryW_VX {
- foreach m = MxListW.m in
+ foreach m = MxListW in
defm "_VX" : VPseudoBinary<m.wvrclass, m.vrclass, GPR, m,
"@earlyclobber $rd">;
}
multiclass VPseudoBinaryW_VF {
- foreach m = MxListW.m in
- foreach f = FPListW.fpinfo in
+ foreach f = FPListW in
+ foreach m = f.MxList in
defm "_V" # f.FX : VPseudoBinary<m.wvrclass, m.vrclass,
f.fprclass, m,
"@earlyclobber $rd">;
}
-multiclass VPseudoBinaryW_WV {
- foreach m = MxListW.m in {
+multiclass VPseudoBinaryW_WV<list<LMULInfo> mxlist = MxListW> {
+ foreach m = mxlist in {
defm _WV : VPseudoBinary<m.wvrclass, m.wvrclass, m.vrclass, m,
"@earlyclobber $rd">;
defm _WV : VPseudoTiedBinary<m.wvrclass, m.vrclass, m,
@@ -1692,13 +1786,13 @@ multiclass VPseudoBinaryW_WV {
}
multiclass VPseudoBinaryW_WX {
- foreach m = MxListW.m in
+ foreach m = MxListW in
defm "_WX" : VPseudoBinary<m.wvrclass, m.wvrclass, GPR, m>;
}
multiclass VPseudoBinaryW_WF {
- foreach m = MxListW.m in
- foreach f = FPListW.fpinfo in
+ foreach f = FPListW in
+ foreach m = f.MxList in
defm "_W" # f.FX : VPseudoBinary<m.wvrclass, m.wvrclass,
f.fprclass, m>;
}
@@ -1709,19 +1803,19 @@ multiclass VPseudoBinaryW_WF {
// "The destination EEW is smaller than the source EEW and the overlap is in the
// lowest-numbered part of the source register group."
multiclass VPseudoBinaryV_WV {
- foreach m = MxListW.m in
+ foreach m = MxListW in
defm _WV : VPseudoBinary<m.vrclass, m.wvrclass, m.vrclass, m,
!if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
}
multiclass VPseudoBinaryV_WX {
- foreach m = MxListW.m in
+ foreach m = MxListW in
defm _WX : VPseudoBinary<m.vrclass, m.wvrclass, GPR, m,
!if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
}
multiclass VPseudoBinaryV_WI {
- foreach m = MxListW.m in
+ foreach m = MxListW in
defm _WI : VPseudoBinary<m.vrclass, m.wvrclass, uimm5, m,
!if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
}
@@ -1731,7 +1825,7 @@ multiclass VPseudoBinaryV_WI {
// For vadc and vsbc, CarryIn == 1 and CarryOut == 0
multiclass VPseudoBinaryV_VM<bit CarryOut = 0, bit CarryIn = 1,
string Constraint = ""> {
- foreach m = MxList.m in
+ foreach m = MxList in
def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX :
VPseudoBinaryCarryIn<!if(CarryOut, VR,
!if(!and(CarryIn, !not(CarryOut)),
@@ -1739,9 +1833,19 @@ multiclass VPseudoBinaryV_VM<bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, m.vrclass, m, CarryIn, Constraint>;
}
+multiclass VPseudoTiedBinaryV_VM<bit CarryOut = 0, bit CarryIn = 1,
+ string Constraint = ""> {
+ foreach m = MxList in
+ def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" :
+ VPseudoTiedBinaryCarryIn<!if(CarryOut, VR,
+ !if(!and(CarryIn, !not(CarryOut)),
+ GetVRegNoV0<m.vrclass>.R, m.vrclass)),
+ m.vrclass, m.vrclass, m, CarryIn, Constraint>;
+}
+
multiclass VPseudoBinaryV_XM<bit CarryOut = 0, bit CarryIn = 1,
string Constraint = ""> {
- foreach m = MxList.m in
+ foreach m = MxList in
def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX :
VPseudoBinaryCarryIn<!if(CarryOut, VR,
!if(!and(CarryIn, !not(CarryOut)),
@@ -1749,18 +1853,34 @@ multiclass VPseudoBinaryV_XM<bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, GPR, m, CarryIn, Constraint>;
}
+multiclass VPseudoTiedBinaryV_XM<bit CarryOut = 0, bit CarryIn = 1,
+ string Constraint = ""> {
+ foreach m = MxList in
+ def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU":
+ VPseudoTiedBinaryCarryIn<!if(CarryOut, VR,
+ !if(!and(CarryIn, !not(CarryOut)),
+ GetVRegNoV0<m.vrclass>.R, m.vrclass)),
+ m.vrclass, GPR, m, CarryIn, Constraint>;
+}
+
multiclass VPseudoVMRG_FM {
- foreach m = MxList.m in
- foreach f = FPList.fpinfo in
+ foreach f = FPList in
+ foreach m = f.MxList in {
def "_V" # f.FX # "M_" # m.MX :
VPseudoBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">,
Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>;
+ // Tied version to allow codegen control over the tail elements
+ def "_V" # f.FX # "M_" # m.MX # "_TU":
+ VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">,
+ Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>;
+ }
}
multiclass VPseudoBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1,
string Constraint = ""> {
- foreach m = MxList.m in
+ foreach m = MxList in
def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX :
VPseudoBinaryCarryIn<!if(CarryOut, VR,
!if(!and(CarryIn, !not(CarryOut)),
@@ -1768,8 +1888,18 @@ multiclass VPseudoBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, simm5, m, CarryIn, Constraint>;
}
+multiclass VPseudoTiedBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1,
+ string Constraint = ""> {
+ foreach m = MxList in
+ def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU":
+ VPseudoTiedBinaryCarryIn<!if(CarryOut, VR,
+ !if(!and(CarryIn, !not(CarryOut)),
+ GetVRegNoV0<m.vrclass>.R, m.vrclass)),
+ m.vrclass, simm5, m, CarryIn, Constraint>;
+}
+
multiclass VPseudoUnaryVMV_V_X_I {
- foreach m = MxList.m in {
+ foreach m = MxList in {
let VLMul = m.value in {
def "_V_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, m.vrclass>,
Sched<[WriteVIMovV, ReadVIMovV]>;
@@ -1782,8 +1912,8 @@ multiclass VPseudoUnaryVMV_V_X_I {
}
multiclass VPseudoVMV_F {
- foreach m = MxList.m in {
- foreach f = FPList.fpinfo in {
+ foreach f = FPList in {
+ foreach m = f.MxList in {
let VLMul = m.value in {
def "_" # f.FX # "_" # m.MX :
VPseudoUnaryNoDummyMask<m.vrclass, f.fprclass>,
@@ -1794,7 +1924,7 @@ multiclass VPseudoVMV_F {
}
multiclass VPseudoVCLS_V {
- foreach m = MxList.m in {
+ foreach m = MxListF in {
let VLMul = m.value in {
def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>;
@@ -1805,7 +1935,7 @@ multiclass VPseudoVCLS_V {
}
multiclass VPseudoVSQR_V {
- foreach m = MxList.m in {
+ foreach m = MxListF in {
let VLMul = m.value in {
def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>;
@@ -1816,7 +1946,7 @@ multiclass VPseudoVSQR_V {
}
multiclass VPseudoVRCP_V {
- foreach m = MxList.m in {
+ foreach m = MxListF in {
let VLMul = m.value in {
def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>;
@@ -1828,7 +1958,7 @@ multiclass VPseudoVRCP_V {
multiclass PseudoVEXT_VF2 {
defvar constraints = "@earlyclobber $rd";
- foreach m = MxListVF2.m in
+ foreach m = MxListVF2 in
{
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>,
@@ -1842,7 +1972,7 @@ multiclass PseudoVEXT_VF2 {
multiclass PseudoVEXT_VF4 {
defvar constraints = "@earlyclobber $rd";
- foreach m = MxListVF4.m in
+ foreach m = MxListVF4 in
{
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>,
@@ -1856,7 +1986,7 @@ multiclass PseudoVEXT_VF4 {
multiclass PseudoVEXT_VF8 {
defvar constraints = "@earlyclobber $rd";
- foreach m = MxListVF8.m in
+ foreach m = MxListVF8 in
{
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>,
@@ -1879,29 +2009,29 @@ multiclass PseudoVEXT_VF8 {
// lowest-numbered part of the source register group".
// With LMUL<=1 the source and dest occupy a single register so any overlap
// is in the lowest-numbered part.
-multiclass VPseudoBinaryM_VV {
- foreach m = MxList.m in
+multiclass VPseudoBinaryM_VV<list<LMULInfo> mxlist = MxList> {
+ foreach m = mxlist in
defm _VV : VPseudoBinaryM<VR, m.vrclass, m.vrclass, m,
!if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
}
multiclass VPseudoBinaryM_VX {
- foreach m = MxList.m in
+ foreach m = MxList in
defm "_VX" :
VPseudoBinaryM<VR, m.vrclass, GPR, m,
!if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
}
multiclass VPseudoBinaryM_VF {
- foreach m = MxList.m in
- foreach f = FPList.fpinfo in
+ foreach f = FPList in
+ foreach m = f.MxList in
defm "_V" # f.FX :
VPseudoBinaryM<VR, m.vrclass, f.fprclass, m,
!if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
}
multiclass VPseudoBinaryM_VI {
- foreach m = MxList.m in
+ foreach m = MxList in
defm _VI : VPseudoBinaryM<VR, m.vrclass, simm5, m,
!if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>;
}
@@ -1995,14 +2125,14 @@ multiclass VPseudoVDIV_VV_VX {
}
multiclass VPseudoVFMUL_VV_VF {
- defm "" : VPseudoBinaryV_VV,
+ defm "" : VPseudoBinaryFV_VV,
Sched<[WriteVFMulV, ReadVFMulV, ReadVFMulV, ReadVMask]>;
defm "" : VPseudoBinaryV_VF,
Sched<[WriteVFMulF, ReadVFMulV, ReadVFMulF, ReadVMask]>;
}
multiclass VPseudoVFDIV_VV_VF {
- defm "" : VPseudoBinaryV_VV,
+ defm "" : VPseudoBinaryFV_VV,
Sched<[WriteVFDivV, ReadVFDivV, ReadVFDivV, ReadVMask]>;
defm "" : VPseudoBinaryV_VF,
Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>;
@@ -2021,21 +2151,21 @@ multiclass VPseudoVALU_VV_VX {
}
multiclass VPseudoVSGNJ_VV_VF {
- defm "" : VPseudoBinaryV_VV,
+ defm "" : VPseudoBinaryFV_VV,
Sched<[WriteVFSgnjV, ReadVFSgnjV, ReadVFSgnjV, ReadVMask]>;
defm "" : VPseudoBinaryV_VF,
Sched<[WriteVFSgnjF, ReadVFSgnjV, ReadVFSgnjF, ReadVMask]>;
}
multiclass VPseudoVMAX_VV_VF {
- defm "" : VPseudoBinaryV_VV,
+ defm "" : VPseudoBinaryFV_VV,
Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>;
defm "" : VPseudoBinaryV_VF,
Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>;
}
multiclass VPseudoVALU_VV_VF {
- defm "" : VPseudoBinaryV_VV,
+ defm "" : VPseudoBinaryFV_VV,
Sched<[WriteVFALUV, ReadVFALUV, ReadVFALUV, ReadVMask]>;
defm "" : VPseudoBinaryV_VF,
Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>;
@@ -2068,17 +2198,12 @@ multiclass VPseudoVWMUL_VV_VX {
}
multiclass VPseudoVWMUL_VV_VF {
- defm "" : VPseudoBinaryW_VV,
+ defm "" : VPseudoBinaryW_VV<MxListFW>,
Sched<[WriteVFWMulV, ReadVFWMulV, ReadVFWMulV, ReadVMask]>;
defm "" : VPseudoBinaryW_VF,
Sched<[WriteVFWMulF, ReadVFWMulV, ReadVFWMulF, ReadVMask]>;
}
-multiclass VPseudoBinaryW_VV_VF {
- defm "" : VPseudoBinaryW_VV;
- defm "" : VPseudoBinaryW_VF;
-}
-
multiclass VPseudoVWALU_WV_WX {
defm "" : VPseudoBinaryW_WV,
Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>;
@@ -2087,14 +2212,14 @@ multiclass VPseudoVWALU_WV_WX {
}
multiclass VPseudoVFWALU_VV_VF {
- defm "" : VPseudoBinaryW_VV,
+ defm "" : VPseudoBinaryW_VV<MxListFW>,
Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>;
defm "" : VPseudoBinaryW_VF,
Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>;
}
multiclass VPseudoVFWALU_WV_WF {
- defm "" : VPseudoBinaryW_WV,
+ defm "" : VPseudoBinaryW_WV<MxListFW>,
Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>;
defm "" : VPseudoBinaryW_WF,
Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>;
@@ -2107,6 +2232,13 @@ multiclass VPseudoVMRG_VM_XM_IM {
Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>;
defm "" : VPseudoBinaryV_IM,
Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>;
+ // Tied versions to allow codegen control over the tail elements
+ defm "" : VPseudoTiedBinaryV_VM,
+ Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>;
+ defm "" : VPseudoTiedBinaryV_XM,
+ Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>;
+ defm "" : VPseudoTiedBinaryV_IM,
+ Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>;
}
multiclass VPseudoVCALU_VM_XM_IM {
@@ -2199,56 +2331,57 @@ multiclass VPseudoTernaryWithPolicy<VReg RetClass,
}
}
-multiclass VPseudoTernaryV_VV_AAXA<string Constraint = ""> {
- foreach m = MxList.m in {
+multiclass VPseudoTernaryV_VV_AAXA<string Constraint = "",
+ list<LMULInfo> mxlist = MxList> {
+ foreach m = mxlist in {
defm _VV : VPseudoTernaryWithPolicy<m.vrclass, m.vrclass, m.vrclass, m,
Constraint, /*Commutable*/1>;
}
}
multiclass VPseudoTernaryV_VX<string Constraint = ""> {
- foreach m = MxList.m in
+ foreach m = MxList in
defm _VX : VPseudoTernary<m.vrclass, m.vrclass, GPR, m, Constraint>;
}
multiclass VPseudoTernaryV_VX_AAXA<string Constraint = ""> {
- foreach m = MxList.m in
+ foreach m = MxList in
defm "_VX" : VPseudoTernaryWithPolicy<m.vrclass, GPR, m.vrclass, m,
Constraint, /*Commutable*/1>;
}
multiclass VPseudoTernaryV_VF_AAXA<string Constraint = ""> {
- foreach m = MxList.m in
- foreach f = FPList.fpinfo in
+ foreach f = FPList in
+ foreach m = f.MxList in
defm "_V" # f.FX : VPseudoTernaryWithPolicy<m.vrclass, f.fprclass,
m.vrclass, m, Constraint,
/*Commutable*/1>;
}
-multiclass VPseudoTernaryW_VV {
+multiclass VPseudoTernaryW_VV<list<LMULInfo> mxlist = MxListW> {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxListW.m in
+ foreach m = mxlist in
defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m,
constraint>;
}
multiclass VPseudoTernaryW_VX {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxListW.m in
+ foreach m = MxListW in
defm "_VX" : VPseudoTernaryWithPolicy<m.wvrclass, GPR, m.vrclass, m,
constraint>;
}
multiclass VPseudoTernaryW_VF {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxListW.m in
- foreach f = FPListW.fpinfo in
+ foreach f = FPListW in
+ foreach m = f.MxList in
defm "_V" # f.FX : VPseudoTernaryWithPolicy<m.wvrclass, f.fprclass,
m.vrclass, m, constraint>;
}
multiclass VPseudoTernaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
- foreach m = MxList.m in
+ foreach m = MxList in
defm _VI : VPseudoTernary<m.vrclass, m.vrclass, ImmType, m, Constraint>;
}
@@ -2260,7 +2393,7 @@ multiclass VPseudoVMAC_VV_VX_AAXA<string Constraint = ""> {
}
multiclass VPseudoVMAC_VV_VF_AAXA<string Constraint = ""> {
- defm "" : VPseudoTernaryV_VV_AAXA<Constraint>,
+ defm "" : VPseudoTernaryV_VV_AAXA<Constraint, MxListF>,
Sched<[WriteVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVMask]>;
defm "" : VPseudoTernaryV_VF_AAXA<Constraint>,
Sched<[WriteVFMulAddF, ReadVFMulAddV, ReadVFMulAddV, ReadVFMulAddF, ReadVMask]>;
@@ -2286,7 +2419,7 @@ multiclass VPseudoVWMAC_VX {
}
multiclass VPseudoVWMAC_VV_VF {
- defm "" : VPseudoTernaryW_VV,
+ defm "" : VPseudoTernaryW_VV<MxListFW>,
Sched<[WriteVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVMask]>;
defm "" : VPseudoTernaryW_VF,
Sched<[WriteVFWMulAddF, ReadVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddF, ReadVMask]>;
@@ -2309,7 +2442,7 @@ multiclass VPseudoVCMPM_VV_VX {
}
multiclass VPseudoVCMPM_VV_VF {
- defm "" : VPseudoBinaryM_VV,
+ defm "" : VPseudoBinaryM_VV<MxListF>,
Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>;
defm "" : VPseudoBinaryM_VF,
Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>;
@@ -2328,35 +2461,35 @@ multiclass VPseudoVCMPM_VX_VI {
}
multiclass VPseudoVRED_VS {
- foreach m = MxList.m in {
+ foreach m = MxList in {
defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV, ReadVIRedV, ReadVMask]>;
}
}
multiclass VPseudoVWRED_VS {
- foreach m = MxList.m in {
+ foreach m = MxList in {
defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV, ReadVIWRedV, ReadVMask]>;
}
}
multiclass VPseudoVFRED_VS {
- foreach m = MxList.m in {
+ foreach m = MxListF in {
defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV, ReadVFRedV, ReadVMask]>;
}
}
multiclass VPseudoVFREDO_VS {
- foreach m = MxList.m in {
+ foreach m = MxListF in {
defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVMask]>;
}
}
multiclass VPseudoVFWRED_VS {
- foreach m = MxList.m in {
+ foreach m = MxListF in {
defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVMask]>;
}
@@ -2374,61 +2507,61 @@ multiclass VPseudoConversion<VReg RetClass,
}
multiclass VPseudoVCVTI_V {
- foreach m = MxList.m in
+ foreach m = MxListF in
defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>,
Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>;
}
multiclass VPseudoVCVTF_V {
- foreach m = MxList.m in
+ foreach m = MxListF in
defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>,
Sched<[WriteVFCvtIToFV, ReadVFCvtIToFV, ReadVMask]>;
}
multiclass VPseudoConversionW_V {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxListW.m in
+ foreach m = MxListW in
defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>;
}
multiclass VPseudoVWCVTI_V {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxList.m[0-5] in
+ foreach m = MxListFW in
defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>;
}
multiclass VPseudoVWCVTF_V {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxList.m[0-5] in
+ foreach m = MxListW in
defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
Sched<[WriteVFWCvtIToFV, ReadVFWCvtIToFV, ReadVMask]>;
}
multiclass VPseudoVWCVTD_V {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxList.m[0-5] in
+ foreach m = MxListFW in
defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>;
}
multiclass VPseudoVNCVTI_W {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxList.m[0-5] in
+ foreach m = MxListW in
defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>,
Sched<[WriteVFNCvtFToIV, ReadVFNCvtFToIV, ReadVMask]>;
}
multiclass VPseudoVNCVTF_W {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxList.m[0-5] in
+ foreach m = MxListFW in
defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>,
Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>;
}
multiclass VPseudoVNCVTD_W {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxListW.m in
+ foreach m = MxListFW in
defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>,
Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>;
}
@@ -3702,6 +3835,28 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
}
}
+multiclass VPatCompare_VI<string intrinsic, string inst,
+ ImmLeaf ImmType = simm5_plus1> {
+ foreach vti = AllIntegerVectors in {
+ defvar Intr = !cast<Intrinsic>(intrinsic);
+ defvar Pseudo = !cast<Instruction>(inst#"_VI_"#vti.LMul.MX);
+ def : Pat<(vti.Mask (Intr (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar ImmType:$rs2),
+ VLOpFrag)),
+ (Pseudo vti.RegClass:$rs1, (DecImm ImmType:$rs2),
+ GPR:$vl, vti.Log2SEW)>;
+ defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
+ defvar PseudoMask = !cast<Instruction>(inst#"_VI_"#vti.LMul.MX#"_MASK");
+ def : Pat<(vti.Mask (IntrMask (vti.Mask VR:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar ImmType:$rs2),
+ (vti.Mask V0),
+ VLOpFrag)),
+ (PseudoMask VR:$merge, vti.RegClass:$rs1, (DecImm ImmType:$rs2),
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Pseudo instructions
//===----------------------------------------------------------------------===//
@@ -3741,7 +3896,7 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in {
def PseudoVRELOAD_M8 : VPseudo<VL8RE8_V, V_M8, (outs VRM8:$rs1), (ins GPR:$rs2)>;
}
-foreach lmul = MxList.m in {
+foreach lmul = MxList in {
foreach nf = NFSet<lmul>.L in {
defvar vreg = SegRegClass<lmul, nf>.RC;
let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1 in {
@@ -3765,9 +3920,9 @@ let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
// the when we aren't using one of the special X0 encodings. Otherwise it could
// be accidentally be made X0 by MachineIR optimizations. To satisfy the
// verifier, we also need a GPRX0 instruction for the special encodings.
-def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPRNoX0:$rs1, VTypeIOp:$vtypei), []>;
-def PseudoVSETVLIX0 : Pseudo<(outs GPR:$rd), (ins GPRX0:$rs1, VTypeIOp:$vtypei), []>;
-def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei), []>;
+def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPRNoX0:$rs1, VTypeIOp11:$vtypei), []>;
+def PseudoVSETVLIX0 : Pseudo<(outs GPR:$rd), (ins GPRX0:$rs1, VTypeIOp11:$vtypei), []>;
+def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp10:$vtypei), []>;
}
//===----------------------------------------------------------------------===//
@@ -4304,7 +4459,7 @@ defm PseudoVID : VPseudoVID_V;
let Predicates = [HasVInstructions] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- foreach m = MxList.m in {
+ foreach m = MxList in {
let VLMul = m.value in {
let HasSEWOp = 1, BaseInstr = VMV_X_S in
def PseudoVMV_X_S # "_" # m.MX:
@@ -4330,8 +4485,8 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
let Predicates = [HasVInstructionsAnyF] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- foreach m = MxList.m in {
- foreach f = FPList.fpinfo in {
+ foreach f = FPList in {
+ foreach m = f.MxList in {
let VLMul = m.value in {
let HasSEWOp = 1, BaseInstr = VFMV_F_S in
def "PseudoVFMV_" # f.FX # "_S_" # m.MX :
@@ -4452,6 +4607,30 @@ defm : VPatBinaryV_VV_VX_VI<"int_riscv_vsrl", "PseudoVSRL", AllIntegerVectors,
defm : VPatBinaryV_VV_VX_VI<"int_riscv_vsra", "PseudoVSRA", AllIntegerVectors,
uimm5>;
+foreach vti = AllIntegerVectors in {
+ // Emit shift by 1 as an add since it might be faster.
+ def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector vti.RegClass:$rs1),
+ (XLenVT 1), VLOpFrag)),
+ (!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
+ vti.RegClass:$rs1,
+ GPR:$vl,
+ vti.Log2SEW)>;
+ def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (XLenVT 1),
+ (vti.Mask V0),
+ VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge,
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs1,
+ (vti.Mask V0),
+ GPR:$vl,
+ vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+}
+
//===----------------------------------------------------------------------===//
// 12.7. Vector Narrowing Integer Right Shift Instructions
//===----------------------------------------------------------------------===//
@@ -4481,129 +4660,11 @@ defm : VPatBinarySwappedM_VV<"int_riscv_vmsge", "PseudoVMSLE", AllIntegerVectors
// Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16. This
// avoids the user needing to know that there is no vmslt(u).vi instruction.
// Similar for vmsge(u).vx intrinsics using vmslt(u).vi.
-foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Mask (int_riscv_vmslt (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
- (DecImm simm5_plus1:$rs2),
- GPR:$vl,
- vti.Log2SEW)>;
- def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2),
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK")
- VR:$merge,
- vti.RegClass:$rs1,
- (DecImm simm5_plus1:$rs2),
- (vti.Mask V0),
- GPR:$vl,
- vti.Log2SEW)>;
+defm : VPatCompare_VI<"int_riscv_vmslt", "PseudoVMSLE">;
+defm : VPatCompare_VI<"int_riscv_vmsltu", "PseudoVMSLEU", simm5_plus1_nonzero>;
- def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
- (DecImm simm5_plus1:$rs2),
- GPR:$vl,
- vti.Log2SEW)>;
- def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2),
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK")
- VR:$merge,
- vti.RegClass:$rs1,
- (DecImm simm5_plus1:$rs2),
- (vti.Mask V0),
- GPR:$vl,
- vti.Log2SEW)>;
-
- // Special cases to avoid matching vmsltu.vi 0 (always false) to
- // vmsleu.vi -1 (always true). Instead match to vmsne.vv.
- def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar 0), VLOpFrag)),
- (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
- vti.RegClass:$rs1,
- GPR:$vl,
- vti.Log2SEW)>;
- def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar 0),
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK")
- VR:$merge,
- vti.RegClass:$rs1,
- vti.RegClass:$rs1,
- (vti.Mask V0),
- GPR:$vl,
- vti.Log2SEW)>;
-
- def : Pat<(vti.Mask (int_riscv_vmsge (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMSGT_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
- (DecImm simm5_plus1:$rs2),
- GPR:$vl,
- vti.Log2SEW)>;
- def : Pat<(vti.Mask (int_riscv_vmsge_mask (vti.Mask VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2),
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMSGT_VI_"#vti.LMul.MX#"_MASK")
- VR:$merge,
- vti.RegClass:$rs1,
- (DecImm simm5_plus1:$rs2),
- (vti.Mask V0),
- GPR:$vl,
- vti.Log2SEW)>;
-
- def : Pat<(vti.Mask (int_riscv_vmsgeu (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMSGTU_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
- (DecImm simm5_plus1:$rs2),
- GPR:$vl,
- vti.Log2SEW)>;
- def : Pat<(vti.Mask (int_riscv_vmsgeu_mask (vti.Mask VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2),
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMSGTU_VI_"#vti.LMul.MX#"_MASK")
- VR:$merge,
- vti.RegClass:$rs1,
- (DecImm simm5_plus1:$rs2),
- (vti.Mask V0),
- GPR:$vl,
- vti.Log2SEW)>;
-
- // Special cases to avoid matching vmsgeu.vi 0 (always true) to
- // vmsgtu.vi -1 (always false). Instead match to vmsne.vv.
- def : Pat<(vti.Mask (int_riscv_vmsgeu (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar 0), VLOpFrag)),
- (!cast<Instruction>("PseudoVMSEQ_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
- vti.RegClass:$rs1,
- GPR:$vl,
- vti.Log2SEW)>;
- def : Pat<(vti.Mask (int_riscv_vmsgeu_mask (vti.Mask VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar 0),
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMSEQ_VV_"#vti.LMul.MX#"_MASK")
- VR:$merge,
- vti.RegClass:$rs1,
- vti.RegClass:$rs1,
- (vti.Mask V0),
- GPR:$vl,
- vti.Log2SEW)>;
-}
+defm : VPatCompare_VI<"int_riscv_vmsge", "PseudoVMSGT">;
+defm : VPatCompare_VI<"int_riscv_vmsgeu", "PseudoVMSGTU", simm5_plus1_nonzero>;
//===----------------------------------------------------------------------===//
// 12.9. Vector Integer Min/Max Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 711ad4335ece..e452a84a9a6f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -363,6 +363,91 @@ multiclass VPatNConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
}
}
+multiclass VPatWidenBinarySDNode_VV_VX_WV_WX<SDNode op, PatFrags extop, string instruction_name> {
+ foreach vti = AllWidenableIntVectors in {
+ def : Pat<(op (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
+ (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.Vti.LMul.MX)
+ vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
+ vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ def : Pat<(op (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
+ (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))),
+ (!cast<Instruction>(instruction_name#"_VX_"#vti.Vti.LMul.MX)
+ vti.Vti.RegClass:$rs2, GPR:$rs1,
+ vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
+ (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.Vti.LMul.MX)
+ vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
+ vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
+ (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))),
+ (!cast<Instruction>(instruction_name#"_WX_"#vti.Vti.LMul.MX)
+ vti.Wti.RegClass:$rs2, GPR:$rs1,
+ vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ }
+}
+
+multiclass VPatWidenMulAddSDNode_VV<PatFrags extop1, PatFrags extop2, string instruction_name> {
+ foreach vti = AllWidenableIntVectors in {
+ def : Pat<
+ (add (vti.Wti.Vector vti.Wti.RegClass:$rd),
+ (mul_oneuse (vti.Wti.Vector (extop1 (vti.Vti.Vector vti.Vti.RegClass:$rs1))),
+ (vti.Wti.Vector (extop2 (vti.Vti.Vector vti.Vti.RegClass:$rs2))))),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.Vti.LMul.MX)
+ vti.Wti.RegClass:$rd, vti.Vti.RegClass:$rs1, vti.Vti.RegClass:$rs2,
+ vti.Vti.AVL, vti.Vti.Log2SEW, TAIL_AGNOSTIC
+ )>;
+ }
+}
+multiclass VPatWidenMulAddSDNode_VX<PatFrags extop1, PatFrags extop2, string instruction_name> {
+ foreach vti = AllWidenableIntVectors in {
+ def : Pat<
+ (add (vti.Wti.Vector vti.Wti.RegClass:$rd),
+ (mul_oneuse (vti.Wti.Vector (extop1 (vti.Vti.Vector (SplatPat GPR:$rs1)))),
+ (vti.Wti.Vector (extop2 (vti.Vti.Vector vti.Vti.RegClass:$rs2))))),
+ (!cast<Instruction>(instruction_name#"_VX_"#vti.Vti.LMul.MX)
+ vti.Wti.RegClass:$rd, GPR:$rs1, vti.Vti.RegClass:$rs2,
+ vti.Vti.AVL, vti.Vti.Log2SEW, TAIL_AGNOSTIC
+ )>;
+ }
+}
+
+multiclass VPatWidenBinaryFPSDNode_VV_VF<SDNode op, string instruction_name> {
+ foreach vti = AllWidenableFloatVectors in {
+ def : Pat<(op (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
+ (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.Vti.LMul.MX)
+ vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
+ vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ def : Pat<(op (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
+ (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector (SplatPat vti.Vti.ScalarRegClass:$rs1))))),
+ (!cast<Instruction>(instruction_name#"_V"#vti.Vti.ScalarSuffix#"_"#vti.Vti.LMul.MX)
+ vti.Vti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1,
+ vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ }
+}
+
+multiclass VPatWidenBinaryFPSDNode_WV_WF<SDNode op, string instruction_name> {
+ foreach vti = AllWidenableFloatVectors in {
+ def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
+ (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.Vti.LMul.MX)
+ vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
+ vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
+ (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector (SplatPat vti.Vti.ScalarRegClass:$rs1))))),
+ (!cast<Instruction>(instruction_name#"_W"#vti.Vti.ScalarSuffix#"_"#vti.Vti.LMul.MX)
+ vti.Wti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1,
+ vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ }
+}
+
+multiclass VPatWidenBinaryFPSDNode_VV_VF_WV_WF<SDNode op, string instruction_name> {
+ defm : VPatWidenBinaryFPSDNode_VV_VF<op, instruction_name>;
+ defm : VPatWidenBinaryFPSDNode_WV_WF<op, instruction_name>;
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -399,6 +484,15 @@ foreach vti = AllIntegerVectors in {
vti.RegClass:$rs1, simm5:$rs2, vti.AVL, vti.Log2SEW)>;
}
+// 12.2. Vector Widening Integer Add and Subtract
+defm : VPatWidenBinarySDNode_VV_VX_WV_WX<add, sext_oneuse, "PseudoVWADD">;
+defm : VPatWidenBinarySDNode_VV_VX_WV_WX<add, zext_oneuse, "PseudoVWADDU">;
+defm : VPatWidenBinarySDNode_VV_VX_WV_WX<add, anyext_oneuse, "PseudoVWADDU">;
+
+defm : VPatWidenBinarySDNode_VV_VX_WV_WX<sub, sext_oneuse, "PseudoVWSUB">;
+defm : VPatWidenBinarySDNode_VV_VX_WV_WX<sub, zext_oneuse, "PseudoVWSUBU">;
+defm : VPatWidenBinarySDNode_VV_VX_WV_WX<sub, anyext_oneuse, "PseudoVWSUBU">;
+
// 12.3. Vector Integer Extension
defm : VPatExtendSDNode_V<[zext, anyext], "PseudoVZEXT", "VF2",
AllFractionableVF2IntVectors>;
@@ -513,6 +607,15 @@ foreach vti = AllIntegerVectors in {
vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
+// 12.14 Vector Widening Integer Multiply-Add Instructions
+defm : VPatWidenMulAddSDNode_VV<sext_oneuse, sext_oneuse, "PseudoVWMACC">;
+defm : VPatWidenMulAddSDNode_VX<sext_oneuse, sext_oneuse, "PseudoVWMACC">;
+defm : VPatWidenMulAddSDNode_VV<zext_oneuse, zext_oneuse, "PseudoVWMACCU">;
+defm : VPatWidenMulAddSDNode_VX<zext_oneuse, zext_oneuse, "PseudoVWMACCU">;
+defm : VPatWidenMulAddSDNode_VV<sext_oneuse, zext_oneuse, "PseudoVWMACCSU">;
+defm : VPatWidenMulAddSDNode_VX<sext_oneuse, zext_oneuse, "PseudoVWMACCSU">;
+defm : VPatWidenMulAddSDNode_VX<zext_oneuse, sext_oneuse, "PseudoVWMACCUS">;
+
// 12.15. Vector Integer Merge Instructions
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Vector (vselect (vti.Mask V0), vti.RegClass:$rs1,
@@ -582,11 +685,18 @@ defm : VPatBinaryFPSDNode_VV_VF<fadd, "PseudoVFADD">;
defm : VPatBinaryFPSDNode_VV_VF<fsub, "PseudoVFSUB">;
defm : VPatBinaryFPSDNode_R_VF<fsub, "PseudoVFRSUB">;
+// 14.3. Vector Widening Floating-Point Add/Subtract Instructions
+defm : VPatWidenBinaryFPSDNode_VV_VF_WV_WF<fadd, "PseudoVFWADD">;
+defm : VPatWidenBinaryFPSDNode_VV_VF_WV_WF<fsub, "PseudoVFWSUB">;
+
// 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
defm : VPatBinaryFPSDNode_VV_VF<fmul, "PseudoVFMUL">;
defm : VPatBinaryFPSDNode_VV_VF<fdiv, "PseudoVFDIV">;
defm : VPatBinaryFPSDNode_R_VF<fdiv, "PseudoVFRDIV">;
+// 14.5. Vector Widening Floating-Point Multiply Instructions
+defm : VPatWidenBinaryFPSDNode_VV_VF<fmul, "PseudoVFWMUL">;
+
// 14.6 Vector Single-Width Floating-Point Fused Multiply-Add Instructions.
foreach fvti = AllFloatVectors in {
// NOTE: We choose VFMADD because it has the most commuting freedom. So it
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 73b97e1c3675..964f0fa54512 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -177,14 +177,13 @@ def riscv_vrgatherei16_vv_vl : SDNode<"RISCVISD::VRGATHEREI16_VV_VL",
SDTCisSameNumEltsAs<0, 3>,
SDTCisVT<4, XLenVT>]>>;
-def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL",
- SDTypeProfile<1, 4, [SDTCisVec<0>,
- SDTCisVec<1>,
- SDTCisSameNumEltsAs<0, 1>,
- SDTCVecEltisVT<1, i1>,
- SDTCisSameAs<0, 2>,
- SDTCisSameAs<2, 3>,
- SDTCisVT<4, XLenVT>]>>;
+def SDT_RISCVSelect_VL : SDTypeProfile<1, 4, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<1, i1>,
+ SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisVT<4, XLenVT>
+]>;
+
+def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", SDT_RISCVSelect_VL>;
+def riscv_vp_merge_vl : SDNode<"RISCVISD::VP_MERGE_VL", SDT_RISCVSelect_VL>;
def SDT_RISCVMaskBinOp_VL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
@@ -216,19 +215,20 @@ def riscv_zext_vl : SDNode<"RISCVISD::VZEXT_VL", SDT_RISCVVEXTEND_VL>;
def riscv_trunc_vector_vl : SDNode<"RISCVISD::TRUNCATE_VECTOR_VL",
SDTypeProfile<1, 3, [SDTCisVec<0>,
- SDTCisVec<1>,
+ SDTCisSameNumEltsAs<0, 1>,
SDTCisSameNumEltsAs<0, 2>,
SDTCVecEltisVT<2, i1>,
SDTCisVT<3, XLenVT>]>>;
-def SDT_RISCVVWMUL_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
- SDTCisSameNumEltsAs<0, 1>,
- SDTCisSameAs<1, 2>,
- SDTCisSameNumEltsAs<1, 3>,
- SDTCVecEltisVT<3, i1>,
- SDTCisVT<4, XLenVT>]>;
-def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWMUL_VL, [SDNPCommutative]>;
-def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWMUL_VL, [SDNPCommutative]>;
+def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
+ SDTCisSameNumEltsAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameNumEltsAs<1, 3>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisVT<4, XLenVT>]>;
+def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
def SDTRVVVecReduce : SDTypeProfile<1, 5, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>,
@@ -363,37 +363,47 @@ multiclass VPatBinaryWVL_VV_VX<SDNode vop, string instruction_name> {
}
}
-class VPatBinaryVL_VF<SDNode vop,
- string instruction_name,
- ValueType result_type,
- ValueType vop_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- VReg vop_reg_class,
- RegisterClass scalar_reg_class> :
- Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
- (vop_type (SplatFPOp scalar_reg_class:$rs2)),
- (mask_type true_mask),
- VLOpFrag)),
+multiclass VPatBinaryVL_VF<SDNode vop,
+ string instruction_name,
+ ValueType result_type,
+ ValueType vop_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg vop_reg_class,
+ RegisterClass scalar_reg_class> {
+ def : Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
+ (vop_type (SplatFPOp scalar_reg_class:$rs2)),
+ (mask_type true_mask),
+ VLOpFrag)),
(!cast<Instruction>(instruction_name#"_"#vlmul.MX)
vop_reg_class:$rs1,
scalar_reg_class:$rs2,
GPR:$vl, sew)>;
+ def : Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
+ (vop_type (SplatFPOp scalar_reg_class:$rs2)),
+ (mask_type V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_"#vlmul.MX#"_MASK")
+ (result_type (IMPLICIT_DEF)),
+ vop_reg_class:$rs1,
+ scalar_reg_class:$rs2,
+ (mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
+}
multiclass VPatBinaryFPVL_VV_VF<SDNode vop, string instruction_name> {
foreach vti = AllFloatVectors in {
defm : VPatBinaryVL_VV<vop, instruction_name,
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
vti.LMul, vti.RegClass>;
- def : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.ScalarRegClass>;
+ defm : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.ScalarRegClass>;
}
}
multiclass VPatBinaryFPVL_R_VF<SDNode vop, string instruction_name> {
- foreach fvti = AllFloatVectors in
+ foreach fvti = AllFloatVectors in {
def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2),
fvti.RegClass:$rs1,
(fvti.Mask true_mask),
@@ -401,6 +411,15 @@ multiclass VPatBinaryFPVL_R_VF<SDNode vop, string instruction_name> {
(!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
GPR:$vl, fvti.Log2SEW)>;
+ def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2),
+ fvti.RegClass:$rs1,
+ (fvti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
multiclass VPatIntegerSetCCVL_VV<VTypeInfo vti, string instruction_name,
@@ -602,6 +621,47 @@ multiclass VPatReductionVL<SDNode vop, string instruction_name, bit is_float> {
}
}
+multiclass VPatBinarySDNodeExt_V_WV<SDNode op, PatFrags extop, string instruction_name> {
+ foreach vti = AllWidenableIntVectors in {
+ def : Pat<
+ (vti.Vti.Vector
+ (riscv_trunc_vector_vl
+ (op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
+ (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
+ (riscv_vmset_vl VLMax),
+ VLMax)),
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.Vti.LMul.MX)
+ vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
+ vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ }
+}
+
+multiclass VPatBinarySDNodeExt_V_WX<SDNode op, PatFrags extop, string instruction_name> {
+ foreach vti = AllWidenableIntVectors in {
+ def : Pat<
+ (vti.Vti.Vector
+ (riscv_trunc_vector_vl
+ (op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
+ (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))),
+ (riscv_vmset_vl VLMax),
+ VLMax)),
+ (!cast<Instruction>(instruction_name#"_WX_"#vti.Vti.LMul.MX)
+ vti.Wti.RegClass:$rs2, GPR:$rs1,
+ vti.Vti.AVL, vti.Vti.Log2SEW)>;
+ }
+}
+
+
+multiclass VPatBinarySDNode_V_WV<SDNode op, string instruction_name> {
+ defm : VPatBinarySDNodeExt_V_WV<op, sext_oneuse, instruction_name>;
+ defm : VPatBinarySDNodeExt_V_WV<op, zext_oneuse, instruction_name>;
+}
+
+multiclass VPatBinarySDNode_V_WX<SDNode op, string instruction_name> {
+ defm : VPatBinarySDNodeExt_V_WX<op, sext_oneuse, instruction_name>;
+ defm : VPatBinarySDNodeExt_V_WX<op, zext_oneuse, instruction_name>;
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -661,6 +721,9 @@ foreach vti = AllIntegerVectors in {
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
+// 12.2. Vector Widening Integer Add/Subtract
+defm : VPatBinaryWVL_VV_VX<riscv_vwaddu_vl, "PseudoVWADDU">;
+
// 12.3. Vector Integer Extension
defm : VPatExtendSDNode_V_VL<riscv_zext_vl, "PseudoVZEXT", "VF2",
AllFractionableVF2IntVectors>;
@@ -696,14 +759,19 @@ foreach vti = AllIntegerVectors in {
}
// 12.7. Vector Narrowing Integer Right Shift Instructions
+defm : VPatBinarySDNode_V_WV<srl, "PseudoVNSRL">;
+defm : VPatBinarySDNode_V_WX<srl, "PseudoVNSRL">;
+defm : VPatBinarySDNode_V_WV<sra, "PseudoVNSRA">;
+defm : VPatBinarySDNode_V_WX<sra, "PseudoVNSRA">;
+
foreach vtiTowti = AllWidenableIntVectors in {
defvar vti = vtiTowti.Vti;
defvar wti = vtiTowti.Wti;
def : Pat<(vti.Vector (riscv_trunc_vector_vl (wti.Vector wti.RegClass:$rs1),
(vti.Mask true_mask),
VLOpFrag)),
- (!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX)
- wti.RegClass:$rs1, 0, GPR:$vl, vti.Log2SEW)>;
+ (!cast<Instruction>("PseudoVNSRL_WX_"#vti.LMul.MX)
+ wti.RegClass:$rs1, X0, GPR:$vl, vti.Log2SEW)>;
def : Pat<(vti.Vector
(riscv_trunc_vector_vl
@@ -760,6 +828,8 @@ foreach vti = AllIntegerVectors in {
defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSNE", SETNE, SETNE>;
defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSLE", SETLE, SETGE>;
defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSLEU", SETULE, SETUGE>;
+ defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGT", SETGT, SETLT>;
+ defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGTU", SETUGT, SETULT>;
defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSLE", SETLT,
SplatPat_simm5_plus1>;
@@ -905,6 +975,30 @@ foreach vti = AllIntegerVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX#"_TU")
+ vti.RegClass:$rs2, vti.RegClass:$rs2, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
+ (SplatPat XLenVT:$rs1),
+ vti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX#"_TU")
+ vti.RegClass:$rs2, vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
+ (SplatPat_simm5 simm5:$rs1),
+ vti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX#"_TU")
+ vti.RegClass:$rs2, vti.RegClass:$rs2, simm5:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
// 12.16. Vector Integer Move Instructions
@@ -1152,6 +1246,31 @@ foreach fvti = AllFloatVectors in {
(!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
+ def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
+ fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX#"_TU")
+ fvti.RegClass:$rs2, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX#"_TU")
+ fvti.RegClass:$rs2, fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU")
+ fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
+
// 14.16. Vector Floating-Point Move Instruction
// If we're splatting fpimm0, use vmv.v.x vd, x0.
def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
@@ -1368,6 +1487,11 @@ let Predicates = [HasVInstructionsAnyF] in {
// 17.2. Floating-Point Scalar Move Instructions
foreach vti = AllFloatVectors in {
def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$merge),
+ (vti.Scalar (fpimm0)),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_S_X_"#vti.LMul.MX)
+ vti.RegClass:$merge, X0, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$merge),
vti.ScalarRegClass:$rs1,
VLOpFrag)),
(!cast<Instruction>("PseudoVFMV_S_"#vti.ScalarSuffix#"_"#vti.LMul.MX)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 7eb8ae7d4193..db3f5851879a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -12,14 +12,22 @@
// Zbb - 1.0
// Zbc - 1.0
// Zbs - 1.0
-// Zbe - 0.93
-// Zbf - 0.93
-// Zbm - 0.93
-// Zbp - 0.93
-// Zbr - 0.93
-// Zbt - 0.93
-// This version is still experimental as the Bitmanip extensions haven't been
-// ratified yet.
+// Zbe - 0.93 *experimental
+// Zbf - 0.93 *experimental
+// Zbm - 0.93 *experimental
+// Zbp - 0.93 *experimental
+// Zbr - 0.93 *experimental
+// Zbt - 0.93 *experimental
+//
+// The experimental extensions appeared in an earlier draft of the Bitmanip
+// extensions. They are not ratified and subject to change.
+//
+// This file also describes RISC-V instructions from the Zbk* extensions in
+// Cryptography Extensions Volume I: Scalar & Entropy Source Instructions,
+// versions:
+// Zbkb - 1.0
+// Zbkc - 1.0
+// Zbkx - 1.0
//
//===----------------------------------------------------------------------===//
@@ -43,6 +51,8 @@ def riscv_shfl : SDNode<"RISCVISD::SHFL", SDTIntBinOp>;
def riscv_shflw : SDNode<"RISCVISD::SHFLW", SDT_RISCVIntBinOpW>;
def riscv_unshfl : SDNode<"RISCVISD::UNSHFL", SDTIntBinOp>;
def riscv_unshflw: SDNode<"RISCVISD::UNSHFLW",SDT_RISCVIntBinOpW>;
+def riscv_bfp : SDNode<"RISCVISD::BFP", SDTIntBinOp>;
+def riscv_bfpw : SDNode<"RISCVISD::BFPW", SDT_RISCVIntBinOpW>;
def riscv_bcompress : SDNode<"RISCVISD::BCOMPRESS", SDTIntBinOp>;
def riscv_bcompressw : SDNode<"RISCVISD::BCOMPRESSW", SDT_RISCVIntBinOpW>;
def riscv_bdecompress : SDNode<"RISCVISD::BDECOMPRESS", SDTIntBinOp>;
@@ -309,14 +319,14 @@ class RVBTernaryImm5<bits<2> funct2, bits<3> funct3, RISCVOpcode opcode,
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZbbOrZbp] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
def ANDN : ALU_rr<0b0100000, 0b111, "andn">,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
def ORN : ALU_rr<0b0100000, 0b110, "orn">,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
def XNOR : ALU_rr<0b0100000, 0b100, "xnor">,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-} // Predicates = [HasStdExtZbbOrZbp]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
let Predicates = [HasStdExtZba] in {
def SH1ADD : ALU_rr<0b0010000, 0b010, "sh1add">,
@@ -327,18 +337,22 @@ def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">,
Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
} // Predicates = [HasStdExtZba]
-let Predicates = [HasStdExtZbbOrZbp] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
def ROL : ALU_rr<0b0110000, 0b001, "rol">,
Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>;
def ROR : ALU_rr<0b0110000, 0b101, "ror">,
Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>;
-} // Predicates = [HasStdExtZbbOrZbp]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
let Predicates = [HasStdExtZbs] in {
-def BCLR : ALU_rr<0b0100100, 0b001, "bclr">, Sched<[]>;
-def BSET : ALU_rr<0b0010100, 0b001, "bset">, Sched<[]>;
-def BINV : ALU_rr<0b0110100, 0b001, "binv">, Sched<[]>;
-def BEXT : ALU_rr<0b0100100, 0b101, "bext">, Sched<[]>;
+def BCLR : ALU_rr<0b0100100, 0b001, "bclr">,
+ Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
+def BSET : ALU_rr<0b0010100, 0b001, "bset">,
+ Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
+def BINV : ALU_rr<0b0110100, 0b001, "binv">,
+ Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
+def BEXT : ALU_rr<0b0100100, 0b101, "bext">,
+ Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
} // Predicates = [HasStdExtZbs]
let Predicates = [HasStdExtZbp] in {
@@ -346,21 +360,28 @@ def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>;
def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>;
} // Predicates = [HasStdExtZbp]
+let Predicates = [HasStdExtZbpOrZbkx] in {
+def XPERMN : ALU_rr<0b0010100, 0b010, "xperm4">, Sched<[]>;
+def XPERMB : ALU_rr<0b0010100, 0b100, "xperm8">, Sched<[]>;
+} // Predicates = [HasStdExtZbpOrZbkx]
+
let Predicates = [HasStdExtZbp] in {
-def XPERMN : ALU_rr<0b0010100, 0b010, "xperm.n">, Sched<[]>;
-def XPERMB : ALU_rr<0b0010100, 0b100, "xperm.b">, Sched<[]>;
def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
} // Predicates = [HasStdExtZbp]
-let Predicates = [HasStdExtZbbOrZbp] in
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in
def RORI : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">,
Sched<[WriteRotateImm, ReadRotateImm]>;
let Predicates = [HasStdExtZbs] in {
-def BCLRI : RVBShift_ri<0b01001, 0b001, OPC_OP_IMM, "bclri">, Sched<[]>;
-def BSETI : RVBShift_ri<0b00101, 0b001, OPC_OP_IMM, "bseti">, Sched<[]>;
-def BINVI : RVBShift_ri<0b01101, 0b001, OPC_OP_IMM, "binvi">, Sched<[]>;
-def BEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "bexti">, Sched<[]>;
+def BCLRI : RVBShift_ri<0b01001, 0b001, OPC_OP_IMM, "bclri">,
+ Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
+def BSETI : RVBShift_ri<0b00101, 0b001, OPC_OP_IMM, "bseti">,
+ Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
+def BINVI : RVBShift_ri<0b01101, 0b001, OPC_OP_IMM, "binvi">,
+ Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
+def BEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "bexti">,
+ Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
} // Predicates = [HasStdExtZbs]
let Predicates = [HasStdExtZbp] in {
@@ -428,11 +449,17 @@ def CRC32CD : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">,
Sched<[]>;
let Predicates = [HasStdExtZbc] in {
-def CLMUL : ALU_rr<0b0000101, 0b001, "clmul">, Sched<[]>;
-def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr">, Sched<[]>;
-def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh">, Sched<[]>;
+def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr">,
+ Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
} // Predicates = [HasStdExtZbc]
+let Predicates = [HasStdExtZbcOrZbkc] in {
+def CLMUL : ALU_rr<0b0000101, 0b001, "clmul">,
+ Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
+def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh">,
+ Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
+} // Predicates = [HasStdExtZbcOrZbkc]
+
let Predicates = [HasStdExtZbb] in {
def MIN : ALU_rr<0b0000101, 0b100, "min">,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
@@ -456,11 +483,13 @@ def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">, Sched<[]>;
def BCOMPRESS : ALU_rr<0b0000100, 0b110, "bcompress">, Sched<[]>;
} // Predicates = [HasStdExtZbe]
-let Predicates = [HasStdExtZbp] in {
+let Predicates = [HasStdExtZbpOrZbkb] in {
def PACK : ALU_rr<0b0000100, 0b100, "pack">, Sched<[]>;
-def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>;
def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
+} // Predicates = [HasStdExtZbpOrZbkb]
+
+let Predicates = [HasStdExtZbp] in
+def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>;
let Predicates = [HasStdExtZbm, IsRV64] in {
def BMATOR : ALU_rr<0b0000100, 0b011, "bmator">, Sched<[]>;
@@ -468,7 +497,8 @@ def BMATXOR : ALU_rr<0b0100100, 0b011, "bmatxor">, Sched<[]>;
} // Predicates = [HasStdExtZbm, IsRV64]
let Predicates = [HasStdExtZbf] in
-def BFP : ALU_rr<0b0100100, 0b111, "bfp">, Sched<[]>;
+def BFP : ALU_rr<0b0100100, 0b111, "bfp">,
+ Sched<[WriteBFP, ReadBFP, ReadBFP]>;
let Predicates = [HasStdExtZbp] in {
def SHFLI : RVBShfl_ri<0b0000100, 0b001, OPC_OP_IMM, "shfli">, Sched<[]>;
@@ -488,7 +518,7 @@ def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">,
Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
} // Predicates = [HasStdExtZbb, IsRV64]
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
def ROLW : ALUW_rr<0b0110000, 0b001, "rolw">,
Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
def RORW : ALUW_rr<0b0110000, 0b101, "rorw">,
@@ -504,7 +534,7 @@ let Predicates = [HasStdExtZbp, IsRV64] in {
def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>;
} // Predicates = [HasStdExtZbp, IsRV64]
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in
def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">,
Sched<[WriteRotateImm32, ReadRotateImm32]>;
@@ -543,13 +573,15 @@ def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>;
def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>;
} // Predicates = [HasStdExtZbe, IsRV64]
-let Predicates = [HasStdExtZbp, IsRV64] in {
+let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in
def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>;
+
+let Predicates = [HasStdExtZbp, IsRV64] in
def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
let Predicates = [HasStdExtZbf, IsRV64] in
-def BFPW : ALUW_rr<0b0100100, 0b111, "bfpw">, Sched<[]>;
+def BFPW : ALUW_rr<0b0100100, 0b111, "bfpw">,
+ Sched<[WriteBFP32, ReadBFP32, ReadBFP32]>;
let Predicates = [HasStdExtZbbOrZbp, IsRV32] in {
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -576,30 +608,30 @@ def ZEXTH_RV64 : RVInstR<0b0000100, 0b100, OPC_OP_32, (outs GPR:$rd),
// causes diagnostics to suggest that Zbp rather than Zbb is required for rev8
// or gorci. Since Zbb is closer to being finalized than Zbp this will be
// misleading to users.
-let Predicates = [HasStdExtZbbOrZbp, IsRV32] in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def REV8_RV32 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
- "rev8", "$rd, $rs1">, Sched<[WriteREV8, ReadREV8]> {
- let imm12 = { 0b01101, 0b0011000 };
-}
-} // Predicates = [HasStdExtZbbOrZbp, IsRV32]
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV32] in {
+def REV8_RV32 : RVBUnary<0b0110100, 0b11000, 0b101, OPC_OP_IMM, "rev8">,
+ Sched<[WriteREV8, ReadREV8]>;
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV32]
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def REV8_RV64 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
- "rev8", "$rd, $rs1">, Sched<[WriteREV8, ReadREV8]> {
- let imm12 = { 0b01101, 0b0111000 };
-}
-} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
+def REV8_RV64 : RVBUnary<0b0110101, 0b11000, 0b101, OPC_OP_IMM, "rev8">,
+ Sched<[WriteREV8, ReadREV8]>;
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
let Predicates = [HasStdExtZbbOrZbp] in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def ORCB : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
- "orc.b", "$rd, $rs1">, Sched<[WriteORCB, ReadORCB]> {
- let imm12 = { 0b00101, 0b0000111 };
-}
+def ORCB : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">,
+ Sched<[WriteORCB, ReadORCB]>;
} // Predicates = [HasStdExtZbbOrZbp]
+let Predicates = [HasStdExtZbpOrZbkb] in
+def BREV8 : RVBUnary<0b0110100, 0b00111, 0b101, OPC_OP_IMM, "brev8">;
+
+let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in {
+def ZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b001, OPC_OP_IMM, "zip">;
+def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">;
+} // Predicates = [HasStdExtZbkb, IsRV32]
+
+
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -614,11 +646,11 @@ def : InstAlias<"rev2.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00010)>;
def : InstAlias<"rev.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00011)>;
def : InstAlias<"rev4.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00100)>;
def : InstAlias<"rev2.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00110)>;
-def : InstAlias<"rev.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00111)>;
def : InstAlias<"rev8.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01000)>;
def : InstAlias<"rev4.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01100)>;
def : InstAlias<"rev2.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01110)>;
def : InstAlias<"rev.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01111)>;
+def : InstAlias<"rev.b $rd, $rs", (BREV8 GPR:$rd, GPR:$rs)>;
def : InstAlias<"zip.n $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0001)>;
def : InstAlias<"unzip.n $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0001)>;
@@ -658,8 +690,7 @@ def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1100)>;
def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1100)>;
def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1110)>;
def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1110)>;
-def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1111)>;
-def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1111)>;
+// zip and unzip are considered instructions rather than an alias.
def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b10000)>;
def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11000)>;
@@ -741,6 +772,13 @@ def : InstAlias<"gorcw $rd, $rs1, $shamt",
(GORCIW GPR:$rd, GPR:$rs1, uimm5:$shamt), 0>;
} // Predicates = [HasStdExtZbp, IsRV64]
+// Zbp is unratified and that it would likely adopt the already ratified Zbkx names.
+// Thus current Zbp instructions are defined as aliases for Zbkx instructions.
+let Predicates = [HasStdExtZbp] in {
+ def : InstAlias<"xperm.b $rd, $rs1, $rs2", (XPERMB GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+ def : InstAlias<"xperm.n $rd, $rs1, $rs2", (XPERMN GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbp]
+
let Predicates = [HasStdExtZbs] in {
def : InstAlias<"bset $rd, $rs1, $shamt",
(BSETI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt), 0>;
@@ -756,16 +794,16 @@ def : InstAlias<"bext $rd, $rs1, $shamt",
// Codegen patterns
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZbbOrZbp] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
def : Pat<(or GPR:$rs1, (not GPR:$rs2)), (ORN GPR:$rs1, GPR:$rs2)>;
def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbbOrZbp]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
-let Predicates = [HasStdExtZbbOrZbp] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
def : PatGprGpr<rotl, ROL>;
def : PatGprGpr<rotr, ROR>;
-} // Predicates = [HasStdExtZbbOrZbp]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
let Predicates = [HasStdExtZbs] in {
def : Pat<(and (not (shiftop<shl> 1, GPR:$rs2)), GPR:$rs1),
@@ -816,7 +854,7 @@ def : Pat<(and GPR:$r, BCLRIANDIMask:$i),
// There's no encoding for roli in the the 'B' extension as it can be
// implemented with rori by negating the immediate.
-let Predicates = [HasStdExtZbbOrZbp] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
def : PatGprImm<rotr, RORI, uimmlog2xlen>;
def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
(RORI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
@@ -834,19 +872,28 @@ def : PatGprGpr<riscv_unshfl, UNSHFL>;
def : PatGprGpr<int_riscv_xperm_n, XPERMN>;
def : PatGprGpr<int_riscv_xperm_b, XPERMB>;
def : PatGprGpr<int_riscv_xperm_h, XPERMH>;
-def : PatGprGpr<int_riscv_xperm_w, XPERMW>;
def : PatGprImm<riscv_shfl, SHFLI, shfl_uimm>;
def : PatGprImm<riscv_unshfl, UNSHFLI, shfl_uimm>;
def : PatGprImm<riscv_grev, GREVI, uimmlog2xlen>;
def : PatGprImm<riscv_gorc, GORCI, uimmlog2xlen>;
+
+// We treat brev8 as a separate instruction, so match it directly.
+def : Pat<(riscv_grev GPR:$rs1, 7), (BREV8 GPR:$rs1)>;
} // Predicates = [HasStdExtZbp]
+let Predicates = [HasStdExtZbp, IsRV64] in
+def : PatGprGpr<int_riscv_xperm_w, XPERMW>;
+
let Predicates = [HasStdExtZbp, IsRV32] in {
def : Pat<(i32 (rotr (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>;
def : Pat<(i32 (rotl (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>;
// We treat rev8 as a separate instruction, so match it directly.
def : Pat<(i32 (riscv_grev GPR:$rs1, 24)), (REV8_RV32 GPR:$rs1)>;
+
+// We treat zip and unzip as separate instructions, so match it directly.
+def : Pat<(i32 (riscv_shfl GPR:$rs1, 15)), (ZIP_RV32 GPR:$rs1)>;
+def : Pat<(i32 (riscv_unshfl GPR:$rs1, 15)), (UNZIP_RV32 GPR:$rs1)>;
} // Predicates = [HasStdExtZbp, IsRV32]
let Predicates = [HasStdExtZbp, IsRV64] in {
@@ -882,21 +929,16 @@ def : Pat<(select GPR:$rs2, GPR:$rs1, GPR:$rs3),
(CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
} // Predicates = [HasStdExtZbt]
-// fshl and fshr concatenate their operands in the same order. fsr and fsl
-// instruction use different orders. fshl will return its first operand for
-// shift of zero, fshr will return its second operand. fsl and fsr both return
-// $rs1 so the patterns need to have different operand orders.
let Predicates = [HasStdExtZbt] in {
def : Pat<(riscv_fsl GPR:$rs1, GPR:$rs3, GPR:$rs2),
(FSL GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(riscv_fsr GPR:$rs3, GPR:$rs1, GPR:$rs2),
+def : Pat<(riscv_fsr GPR:$rs1, GPR:$rs3, GPR:$rs2),
(FSR GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-
-def : Pat<(fshr GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
+def : Pat<(riscv_fsr GPR:$rs1, GPR:$rs3, uimmlog2xlen:$shamt),
(FSRI GPR:$rs1, GPR:$rs3, uimmlog2xlen:$shamt)>;
-// We can use FSRI for fshl by immediate if we subtract the immediate from
+// We can use FSRI for FSL by immediate if we subtract the immediate from
// XLen and swap the operands.
-def : Pat<(fshl GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
+def : Pat<(riscv_fsl GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
(FSRI GPR:$rs1, GPR:$rs3, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
} // Predicates = [HasStdExtZbt]
@@ -918,31 +960,38 @@ def : PatGprGpr<umin, MINU>;
def : PatGprGpr<umax, MAXU>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbb, IsRV32] in {
+let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in {
def : Pat<(i32 (bswap GPR:$rs1)), (REV8_RV32 GPR:$rs1)>;
-} // Predicates = [HasStdExtZbb, IsRV32]
+} // Predicates = [HasStdExtZbbOrZbkb, IsRV32]
-let Predicates = [HasStdExtZbb, IsRV64] in {
+let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
def : Pat<(i64 (bswap GPR:$rs1)), (REV8_RV64 GPR:$rs1)>;
-} // Predicates = [HasStdExtZbb, IsRV64]
+} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
-let Predicates = [HasStdExtZbp, IsRV32] in {
+let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in
def : Pat<(i32 (or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16)))),
(PACK GPR:$rs1, GPR:$rs2)>;
+
+let Predicates = [HasStdExtZbp, IsRV32] in
def : Pat<(i32 (or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16)))),
(PACKU GPR:$rs1, GPR:$rs2)>;
-}
-let Predicates = [HasStdExtZbp, IsRV64] in {
+let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in
def : Pat<(i64 (or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32)))),
(PACK GPR:$rs1, GPR:$rs2)>;
+
+let Predicates = [HasStdExtZbp, IsRV64] in
def : Pat<(i64 (or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32)))),
(PACKU GPR:$rs1, GPR:$rs2)>;
-}
-let Predicates = [HasStdExtZbp] in
+
+let Predicates = [HasStdExtZbpOrZbkb] in {
def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFFFF),
(and GPR:$rs1, 0x00FF)),
(PACKH GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (shl (and GPR:$rs2, 0x00FF), (XLenVT 8)),
+ (and GPR:$rs1, 0x00FF)),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbpOrZbkb]
let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
def : Pat<(i32 (and GPR:$rs, 0xFFFF)), (ZEXTH_RV32 GPR:$rs)>;
@@ -1045,13 +1094,13 @@ def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2))
(SH3ADDUW GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZba, IsRV64]
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
def : PatGprGpr<riscv_rolw, ROLW>;
def : PatGprGpr<riscv_rorw, RORW>;
def : PatGprImm<riscv_rorw, RORIW, uimm5>;
def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
(RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
-} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
let Predicates = [HasStdExtZbp, IsRV64] in {
def : Pat<(riscv_rorw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
@@ -1067,10 +1116,12 @@ def : PatGprImm<riscv_gorcw, GORCIW, uimm5>;
let Predicates = [HasStdExtZbt, IsRV64] in {
def : Pat<(riscv_fslw GPR:$rs1, GPR:$rs3, GPR:$rs2),
(FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, GPR:$rs2),
+def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, GPR:$rs2),
(FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
-def : Pat<(riscv_fsrw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
+def : Pat<(riscv_fsrw GPR:$rs1, GPR:$rs3, uimm5:$shamt),
(FSRIW GPR:$rs1, GPR:$rs3, uimm5:$shamt)>;
+// We can use FSRIW for FSLW by immediate if we subtract the immediate from
+// 32 and swap the operands.
def : Pat<(riscv_fslw GPR:$rs3, GPR:$rs1, uimm5:$shamt),
(FSRIW GPR:$rs1, GPR:$rs3, (ImmSubFrom32 uimm5:$shamt))>;
} // Predicates = [HasStdExtZbt, IsRV64]
@@ -1081,7 +1132,7 @@ def : PatGpr<riscv_ctzw, CTZW>;
def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>;
} // Predicates = [HasStdExtZbb, IsRV64]
-let Predicates = [HasStdExtZbp, IsRV64] in {
+let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in {
def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
(and GPR:$rs1, 0x000000000000FFFF)),
i32)),
@@ -1089,16 +1140,21 @@ def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
(and GPR:$rs1, 0x000000000000FFFF))),
(PACKW GPR:$rs1, GPR:$rs2)>;
+}
+
+let Predicates = [HasStdExtZbp, IsRV64] in
def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
(srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))),
(PACKUW GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-let Predicates = [HasStdExtZbc] in {
+
+let Predicates = [HasStdExtZbcOrZbkc] in {
def : PatGprGpr<int_riscv_clmul, CLMUL>;
def : PatGprGpr<int_riscv_clmulh, CLMULH>;
+} // Predicates = [HasStdExtZbcOrZbkc]
+
+let Predicates = [HasStdExtZbc] in
def : PatGprGpr<int_riscv_clmulr, CLMULR>;
-} // Predicates = [HasStdExtZbc]
let Predicates = [HasStdExtZbe] in {
def : PatGprGpr<riscv_bcompress, BCOMPRESS>;
@@ -1123,3 +1179,23 @@ let Predicates = [HasStdExtZbr, IsRV64] in {
def : PatGpr<int_riscv_crc32_d, CRC32D>;
def : PatGpr<int_riscv_crc32c_d, CRC32CD>;
} // Predicates = [HasStdExtZbr, IsRV64]
+
+let Predicates = [HasStdExtZbf] in
+def : PatGprGpr<riscv_bfp, BFP>;
+
+let Predicates = [HasStdExtZbf, IsRV64] in
+def : PatGprGpr<riscv_bfpw, BFPW>;
+
+let Predicates = [HasStdExtZbkb] in {
+def : PatGpr<int_riscv_brev8, BREV8>;
+} // Predicates = [HasStdExtZbkb]
+
+let Predicates = [HasStdExtZbkb, IsRV32] in {
+def : PatGpr<int_riscv_zip, ZIP_RV32>;
+def : PatGpr<int_riscv_unzip, UNZIP_RV32>;
+} // Predicates = [HasStdExtZbkb, IsRV32]
+
+let Predicates = [HasStdExtZbkx] in {
+def : PatGprGpr<int_riscv_xperm4, XPERMN>;
+def : PatGprGpr<int_riscv_xperm8, XPERMB>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 663e44813899..dfd0c74ee26c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -1,4 +1,4 @@
-//===-- RISCVInstrInfoFH.td - RISC-V 'FH' instructions -----*- tablegen -*-===//
+//===-- RISCVInstrInfoZfh.td - RISC-V 'Zfh' instructions ---*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,9 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file describes the RISC-V instructions from the standard 'Zfh'
-// half-precision floating-point extension, version 0.1.
-// This version is still experimental as the 'Zfh' extension hasn't been
-// ratified yet.
+// half-precision floating-point extension, version 1.0.
//
//===----------------------------------------------------------------------===//
@@ -32,20 +30,12 @@ def riscv_fmv_x_anyexth
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZfhmin] in {
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
-def FLH : RVInstI<0b001, OPC_LOAD_FP, (outs FPR16:$rd),
- (ins GPR:$rs1, simm12:$imm12),
- "flh", "$rd, ${imm12}(${rs1})">,
- Sched<[WriteFLD16, ReadFMemBase]>;
+def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
// encoding.
-let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
-def FSH : RVInstS<0b001, OPC_STORE_FP, (outs),
- (ins FPR16:$rs2, GPR:$rs1, simm12:$imm12),
- "fsh", "$rs2, ${imm12}(${rs1})">,
- Sched<[WriteFST16, ReadStoreData, ReadFMemBase]>;
+def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>;
} // Predicates = [HasStdExtZfhmin]
let Predicates = [HasStdExtZfh] in {
@@ -190,6 +180,10 @@ def : InstAlias<"fge.h $rd, $rs, $rt",
let Predicates = [HasStdExtZfhmin] in {
def PseudoFLH : PseudoFloatLoad<"flh", FPR16>;
def PseudoFSH : PseudoStore<"fsh", FPR16>;
+let usesCustomInserter = 1 in {
+def PseudoQuietFLE_H : PseudoQuietFCMP<FPR16>;
+def PseudoQuietFLT_H : PseudoQuietFCMP<FPR16>;
+}
} // Predicates = [HasStdExtZfhmin]
//===----------------------------------------------------------------------===//
@@ -207,6 +201,7 @@ let Predicates = [HasStdExtZfh] in {
/// Float constants
def : Pat<(f16 (fpimm0)), (FMV_H_X X0)>;
+def : Pat<(f16 (fpimmneg0)), (FSGNJN_H (FMV_H_X X0), (FMV_H_X X0))>;
/// Float conversion operations
@@ -254,13 +249,34 @@ def : PatFpr16Fpr16<fminnum, FMIN_H>;
def : PatFpr16Fpr16<fmaxnum, FMAX_H>;
/// Setcc
-
-def : PatFpr16Fpr16<seteq, FEQ_H>;
-def : PatFpr16Fpr16<setoeq, FEQ_H>;
-def : PatFpr16Fpr16<setlt, FLT_H>;
-def : PatFpr16Fpr16<setolt, FLT_H>;
-def : PatFpr16Fpr16<setle, FLE_H>;
-def : PatFpr16Fpr16<setole, FLE_H>;
+// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
+// strict versions of those.
+
+// Match non-signaling FEQ_D
+def : PatSetCC<FPR16, any_fsetcc, SETEQ, FEQ_H>;
+def : PatSetCC<FPR16, any_fsetcc, SETOEQ, FEQ_H>;
+def : PatSetCC<FPR16, strict_fsetcc, SETLT, PseudoQuietFLT_H>;
+def : PatSetCC<FPR16, strict_fsetcc, SETOLT, PseudoQuietFLT_H>;
+def : PatSetCC<FPR16, strict_fsetcc, SETLE, PseudoQuietFLE_H>;
+def : PatSetCC<FPR16, strict_fsetcc, SETOLE, PseudoQuietFLE_H>;
+
+// Match signaling FEQ_H
+def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs2, SETEQ),
+ (AND (FLE_H $rs1, $rs2),
+ (FLE_H $rs2, $rs1))>;
+def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs2, SETOEQ),
+ (AND (FLE_H $rs1, $rs2),
+ (FLE_H $rs2, $rs1))>;
+// If both operands are the same, use a single FLE.
+def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs1, SETEQ),
+ (FLE_H $rs1, $rs1)>;
+def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs1, SETOEQ),
+ (FLE_H $rs1, $rs1)>;
+
+def : PatSetCC<FPR16, any_fsetccs, SETLT, FLT_H>;
+def : PatSetCC<FPR16, any_fsetccs, SETOLT, FLT_H>;
+def : PatSetCC<FPR16, any_fsetccs, SETLE, FLE_H>;
+def : PatSetCC<FPR16, any_fsetccs, SETOLE, FLE_H>;
def Select_FPR16_Using_CC_GPR : SelectCC_rrirr<FPR16, GPR>;
} // Predicates = [HasStdExtZfh]
@@ -291,14 +307,14 @@ def : Pat<(i32 (any_fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>;
def : Pat<(i32 (any_fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>;
// Saturating float->[u]int32.
-def : Pat<(i32 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>;
-def : Pat<(i32 (riscv_fcvt_xu_rtz FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>;
+def : Pat<(i32 (riscv_fcvt_x FPR16:$rs1, timm:$frm)), (FCVT_W_H $rs1, timm:$frm)>;
+def : Pat<(i32 (riscv_fcvt_xu FPR16:$rs1, timm:$frm)), (FCVT_WU_H $rs1, timm:$frm)>;
// half->int32 with current rounding mode.
-def : Pat<(i32 (lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>;
+def : Pat<(i32 (any_lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>;
// half->int32 rounded to nearest with ties rounded away from zero.
-def : Pat<(i32 (lround FPR16:$rs1)), (FCVT_W_H $rs1, 0b100)>;
+def : Pat<(i32 (any_lround FPR16:$rs1)), (FCVT_W_H $rs1, 0b100)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>;
@@ -309,24 +325,24 @@ let Predicates = [HasStdExtZfh, IsRV64] in {
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>;
-def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>;
+def : Pat<(riscv_any_fcvt_w_rv64 FPR16:$rs1, timm:$frm), (FCVT_W_H $rs1, timm:$frm)>;
+def : Pat<(riscv_any_fcvt_wu_rv64 FPR16:$rs1, timm:$frm), (FCVT_WU_H $rs1, timm:$frm)>;
// half->[u]int64. Round-to-zero must be used.
def : Pat<(i64 (any_fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>;
def : Pat<(i64 (any_fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>;
// Saturating float->[u]int64.
-def : Pat<(i64 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>;
-def : Pat<(i64 (riscv_fcvt_xu_rtz FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>;
+def : Pat<(i64 (riscv_fcvt_x FPR16:$rs1, timm:$frm)), (FCVT_L_H $rs1, timm:$frm)>;
+def : Pat<(i64 (riscv_fcvt_xu FPR16:$rs1, timm:$frm)), (FCVT_LU_H $rs1, timm:$frm)>;
// half->int64 with current rounding mode.
-def : Pat<(i64 (lrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>;
-def : Pat<(i64 (llrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>;
+def : Pat<(i64 (any_lrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>;
+def : Pat<(i64 (any_llrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>;
// half->int64 rounded to nearest with ties rounded away from zero.
-def : Pat<(i64 (lround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>;
-def : Pat<(i64 (llround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>;
+def : Pat<(i64 (any_lround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>;
+def : Pat<(i64 (any_llround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
new file mode 100644
index 000000000000..4a41cddedc71
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
@@ -0,0 +1,203 @@
+//===- RISCVInstrInfoZk.td - RISC-V Scalar Crypto instructions - tablegen -*===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard 'Zk',
+// Scalar Cryptography Instructions extension, version 1.0.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+def RnumArg : AsmOperandClass {
+ let Name = "RnumArg";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidRnumArg";
+}
+
+def rnum : Operand<i32>, TImmLeaf<i32, [{return (Imm >= 0 && Imm <= 10);}]> {
+ let ParserMatchClass = RnumArg;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<4>";
+ let OperandType = "OPERAND_RVKRNUM";
+ let OperandNamespace = "RISCVOp";
+}
+
+def byteselect : Operand<i8>, TImmLeaf<i8, [{return isUInt<2>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<2>;
+ let DecoderMethod = "decodeUImmOperand<2>";
+ let OperandType = "OPERAND_UIMM2";
+ let OperandNamespace = "RISCVOp";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction class templates
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVKUnary<bits<12> imm12_in, bits<3> funct3, string opcodestr>
+ : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
+ opcodestr, "$rd, $rs1">{
+ let imm12 = imm12_in;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVKByteSelect<bits<5> funct5, string opcodestr>
+ : RVInstR<{0b00, funct5}, 0b000, OPC_OP, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, byteselect:$bs),
+ opcodestr, "$rd, $rs1, $rs2, $bs">{
+ bits<2> bs;
+ let Inst{31-30} = bs;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVKUnary_rnum<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1, rnum:$rnum),
+ opcodestr, "$rd, $rs1, $rnum">{
+ bits<4> rnum;
+ let Inst{31-25} = funct7;
+ let Inst{24} = 1;
+ let Inst{23-20} = rnum;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtZknd, IsRV32] in {
+def AES32DSI : RVKByteSelect<0b10101, "aes32dsi">;
+def AES32DSMI : RVKByteSelect<0b10111, "aes32dsmi">;
+} // Predicates = [HasStdExtZknd, IsRV32]
+
+let Predicates = [HasStdExtZknd, IsRV64] in {
+def AES64DS : ALU_rr<0b0011101, 0b000, "aes64ds">;
+def AES64DSM : ALU_rr<0b0011111, 0b000, "aes64dsm">;
+
+def AES64IM : RVKUnary<0b001100000000, 0b001, "aes64im">;
+} // Predicates = [HasStdExtZknd, IsRV64]
+
+let Predicates = [HasStdExtZkndOrZkne, IsRV64] in {
+def AES64KS2 : ALU_rr<0b0111111, 0b000, "aes64ks2">;
+
+def AES64KS1I : RVKUnary_rnum<0b0011000, 0b001, "aes64ks1i">;
+} // Predicates = [HasStdExtZkndOrZkne, IsRV64]
+
+let Predicates = [HasStdExtZkne, IsRV32] in {
+def AES32ESI : RVKByteSelect<0b10001, "aes32esi">;
+def AES32ESMI : RVKByteSelect<0b10011, "aes32esmi">;
+} // Predicates = [HasStdExtZkne, IsRV32]
+
+let Predicates = [HasStdExtZkne, IsRV64] in {
+def AES64ES : ALU_rr<0b0011001, 0b000, "aes64es">;
+def AES64ESM : ALU_rr<0b0011011, 0b000, "aes64esm">;
+} // Predicates = [HasStdExtZkne, IsRV64]
+
+let Predicates = [HasStdExtZknh] in {
+def SHA256SIG0 : RVKUnary<0b000100000010, 0b001, "sha256sig0">;
+def SHA256SIG1 : RVKUnary<0b000100000011, 0b001, "sha256sig1">;
+def SHA256SUM0 : RVKUnary<0b000100000000, 0b001, "sha256sum0">;
+def SHA256SUM1 : RVKUnary<0b000100000001, 0b001, "sha256sum1">;
+} // Predicates = [HasStdExtZknh]
+
+let Predicates = [HasStdExtZknh, IsRV32] in {
+def SHA512SIG0H : ALU_rr<0b0101110, 0b000, "sha512sig0h">;
+def SHA512SIG0L : ALU_rr<0b0101010, 0b000, "sha512sig0l">;
+def SHA512SIG1H : ALU_rr<0b0101111, 0b000, "sha512sig1h">;
+def SHA512SIG1L : ALU_rr<0b0101011, 0b000, "sha512sig1l">;
+def SHA512SUM0R : ALU_rr<0b0101000, 0b000, "sha512sum0r">;
+def SHA512SUM1R : ALU_rr<0b0101001, 0b000, "sha512sum1r">;
+} // [HasStdExtZknh, IsRV32]
+
+let Predicates = [HasStdExtZknh, IsRV64] in {
+def SHA512SIG0 : RVKUnary<0b000100000110, 0b001, "sha512sig0">;
+def SHA512SIG1 : RVKUnary<0b000100000111, 0b001, "sha512sig1">;
+def SHA512SUM0 : RVKUnary<0b000100000100, 0b001, "sha512sum0">;
+def SHA512SUM1 : RVKUnary<0b000100000101, 0b001, "sha512sum1">;
+} // Predicates = [HasStdExtZknh, IsRV64]
+
+let Predicates = [HasStdExtZksed] in {
+def SM4ED : RVKByteSelect<0b11000, "sm4ed">;
+def SM4KS : RVKByteSelect<0b11010, "sm4ks">;
+} // Predicates = [HasStdExtZksed]
+
+let Predicates = [HasStdExtZksh] in {
+def SM3P0 : RVKUnary<0b000100001000, 0b001, "sm3p0">;
+def SM3P1 : RVKUnary<0b000100001001, 0b001, "sm3p1">;
+} // Predicates = [HasStdExtZksh]
+
+//===----------------------------------------------------------------------===//
+// Codegen patterns
+//===----------------------------------------------------------------------===//
+
+class PatGprGprByteSelect<SDPatternOperator OpNode, RVInst Inst>
+ : Pat<(OpNode GPR:$rs1, GPR:$rs2, i8:$imm),
+ (Inst GPR:$rs1, GPR:$rs2, byteselect:$imm)>;
+
+// Zknd
+let Predicates = [HasStdExtZknd, IsRV32] in {
+def : PatGprGprByteSelect<int_riscv_aes32dsi, AES32DSI>;
+def : PatGprGprByteSelect<int_riscv_aes32dsmi, AES32DSMI>;
+} // Predicates = [HasStdExtZknd, IsRV32]
+
+let Predicates = [HasStdExtZknd, IsRV64] in {
+def : PatGprGpr<int_riscv_aes64ds, AES64DS>;
+def : PatGprGpr<int_riscv_aes64dsm, AES64DSM>;
+def : PatGpr<int_riscv_aes64im, AES64IM>;
+} // Predicates = [HasStdExtZknd, IsRV64]
+
+let Predicates = [HasStdExtZkndOrZkne, IsRV64] in {
+def : PatGprGpr<int_riscv_aes64ks2, AES64KS2>;
+def : Pat<(int_riscv_aes64ks1i GPR:$rs1, i32:$rnum),
+ (AES64KS1I GPR:$rs1, rnum:$rnum)>;
+} // Predicates = [HasStdExtZkndOrZkne, IsRV64]
+
+// Zkne
+let Predicates = [HasStdExtZkne, IsRV32] in {
+def : PatGprGprByteSelect<int_riscv_aes32esi, AES32ESI>;
+def : PatGprGprByteSelect<int_riscv_aes32esmi, AES32ESMI>;
+} // Predicates = [HasStdExtZkne, IsRV32]
+
+let Predicates = [HasStdExtZkne, IsRV64] in {
+def : PatGprGpr<int_riscv_aes64es, AES64ES>;
+def : PatGprGpr<int_riscv_aes64esm, AES64ESM>;
+} // Predicates = [HasStdExtZkne, IsRV64]
+
+// Zknh
+let Predicates = [HasStdExtZknh] in {
+def : PatGpr<int_riscv_sha256sig0, SHA256SIG0>;
+def : PatGpr<int_riscv_sha256sig1, SHA256SIG1>;
+def : PatGpr<int_riscv_sha256sum0, SHA256SUM0>;
+def : PatGpr<int_riscv_sha256sum1, SHA256SUM1>;
+} // Predicates = [HasStdExtZknh]
+
+let Predicates = [HasStdExtZknh, IsRV32] in {
+def : PatGprGpr<int_riscv_sha512sig0l, SHA512SIG0L>;
+def : PatGprGpr<int_riscv_sha512sig0h, SHA512SIG0H>;
+def : PatGprGpr<int_riscv_sha512sig1l, SHA512SIG1L>;
+def : PatGprGpr<int_riscv_sha512sig1h, SHA512SIG1H>;
+def : PatGprGpr<int_riscv_sha512sum0r, SHA512SUM0R>;
+def : PatGprGpr<int_riscv_sha512sum1r, SHA512SUM1R>;
+} // Predicates = [HasStdExtZknh, IsRV32]
+
+let Predicates = [HasStdExtZknh, IsRV64] in {
+def : PatGpr<int_riscv_sha512sig0, SHA512SIG0>;
+def : PatGpr<int_riscv_sha512sig1, SHA512SIG1>;
+def : PatGpr<int_riscv_sha512sum0, SHA512SUM0>;
+def : PatGpr<int_riscv_sha512sum1, SHA512SUM1>;
+} // Predicates = [HasStdExtZknh, IsRV64]
+
+// Zksed
+let Predicates = [HasStdExtZksed] in {
+def : PatGprGprByteSelect<int_riscv_sm4ks, SM4KS>;
+def : PatGprGprByteSelect<int_riscv_sm4ed, SM4ED>;
+} // Predicates = [HasStdExtZksed]
+
+// Zksh
+let Predicates = [HasStdExtZksh] in {
+def : PatGpr<int_riscv_sm3p0, SM3P0>;
+def : PatGpr<int_riscv_sm3p1, SM3P1>;
+} // Predicates = [HasStdExtZksh]
diff --git a/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp
index 4d1f47da209d..8dfd71ac0b6b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp
@@ -69,8 +69,7 @@ private:
RISCVInstructionSelector::RISCVInstructionSelector(
const RISCVTargetMachine &TM, const RISCVSubtarget &STI,
const RISCVRegisterBankInfo &RBI)
- : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI),
+ : STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "RISCVGenGlobalISel.inc"
diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
index dd084f53e511..c167c095521a 100644
--- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -172,7 +172,7 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
default:
llvm_unreachable("Unknown operand type");
case MachineOperand::MO_Register: {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (RISCV::VRM2RegClass.contains(Reg) ||
RISCV::VRM4RegClass.contains(Reg) ||
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 9094dff1dda1..35363bf37c0d 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -347,3 +347,8 @@ void RISCVRegisterInfo::getOffsetOpcodes(const StackOffset &Offset,
Ops.push_back(dwarf::DW_OP_minus);
}
}
+
+unsigned
+RISCVRegisterInfo::getRegisterCostTableIndex(const MachineFunction &MF) const {
+ return MF.getSubtarget<RISCVSubtarget>().hasStdExtC() ? 1 : 0;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 2b2bbdfbdf32..9e0ef7902210 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -66,6 +66,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
void getOffsetOpcodes(const StackOffset &Offset,
SmallVectorImpl<uint64_t> &Ops) const override;
+
+ unsigned getRegisterCostTableIndex(const MachineFunction &MF) const override;
};
}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 20903b317180..8c1c03b51c24 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -73,12 +73,11 @@ def sub_vrm1_7 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_1>;
// are not part of GPRC, the most restrictive register class used by the
// compressed instruction set. This will influence the greedy register
// allocator to reduce the use of registers that can't be encoded in 16 bit
-// instructions. This affects register allocation even when compressed
-// instruction isn't targeted, we see no major negative codegen impact.
+// instructions.
let RegAltNameIndices = [ABIRegAltName] in {
def X0 : RISCVReg<0, "x0", ["zero"]>, DwarfRegNum<[0]>;
- let CostPerUse = [1] in {
+ let CostPerUse = [0, 1] in {
def X1 : RISCVReg<1, "x1", ["ra"]>, DwarfRegNum<[1]>;
def X2 : RISCVReg<2, "x2", ["sp"]>, DwarfRegNum<[2]>;
def X3 : RISCVReg<3, "x3", ["gp"]>, DwarfRegNum<[3]>;
@@ -95,7 +94,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
def X13 : RISCVReg<13,"x13", ["a3"]>, DwarfRegNum<[13]>;
def X14 : RISCVReg<14,"x14", ["a4"]>, DwarfRegNum<[14]>;
def X15 : RISCVReg<15,"x15", ["a5"]>, DwarfRegNum<[15]>;
- let CostPerUse = [1] in {
+ let CostPerUse = [0, 1] in {
def X16 : RISCVReg<16,"x16", ["a6"]>, DwarfRegNum<[16]>;
def X17 : RISCVReg<17,"x17", ["a7"]>, DwarfRegNum<[17]>;
def X18 : RISCVReg<18,"x18", ["s2"]>, DwarfRegNum<[18]>;
@@ -138,27 +137,11 @@ def GPRX0 : RegisterClass<"RISCV", [XLenVT], 32, (add X0)> {
let RegInfos = XLenRI;
}
-// The order of registers represents the preferred allocation sequence.
-// Registers are listed in the order caller-save, callee-save, specials.
-def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (add
- (sequence "X%u", 10, 17),
- (sequence "X%u", 5, 7),
- (sequence "X%u", 28, 31),
- (sequence "X%u", 8, 9),
- (sequence "X%u", 18, 27),
- (sequence "X%u", 1, 4)
- )> {
+def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0)> {
let RegInfos = XLenRI;
}
-def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (add
- (sequence "X%u", 10, 17),
- (sequence "X%u", 5, 7),
- (sequence "X%u", 28, 31),
- (sequence "X%u", 8, 9),
- (sequence "X%u", 18, 27),
- X1, X3, X4
- )> {
+def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0, X2)> {
let RegInfos = XLenRI;
}
@@ -166,13 +149,7 @@ def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (add
// stack on some microarchitectures. Also remove the reserved registers X0, X2,
// X3, and X4 as it reduces the number of register classes that get synthesized
// by tablegen.
-def GPRJALR : RegisterClass<"RISCV", [XLenVT], 32, (add
- (sequence "X%u", 10, 17),
- (sequence "X%u", 6, 7),
- (sequence "X%u", 28, 31),
- (sequence "X%u", 8, 9),
- (sequence "X%u", 18, 27)
- )> {
+def GPRJALR : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, (sequence "X%u", 0, 5))> {
let RegInfos = XLenRI;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
new file mode 100644
index 000000000000..12ec52925798
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
@@ -0,0 +1,278 @@
+//===-------------- RISCVSExtWRemoval.cpp - MI sext.w Removal -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass removes unneeded sext.w instructions at the MI level.
+//
+//===---------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-sextw-removal"
+
+STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
+
+static cl::opt<bool> DisableSExtWRemoval("riscv-disable-sextw-removal",
+ cl::desc("Disable removal of sext.w"),
+ cl::init(false), cl::Hidden);
+namespace {
+
+class RISCVSExtWRemoval : public MachineFunctionPass {
+public:
+ static char ID;
+
+ RISCVSExtWRemoval() : MachineFunctionPass(ID) {
+ initializeRISCVSExtWRemovalPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return "RISCV sext.w Removal"; }
+};
+
+} // end anonymous namespace
+
+char RISCVSExtWRemoval::ID = 0;
+INITIALIZE_PASS(RISCVSExtWRemoval, DEBUG_TYPE, "RISCV sext.w Removal", false,
+ false)
+
+FunctionPass *llvm::createRISCVSExtWRemovalPass() {
+ return new RISCVSExtWRemoval();
+}
+
+// This function returns true if the machine instruction always outputs a value
+// where bits 63:32 match bit 31.
+// TODO: Allocate a bit in TSFlags for the W instructions?
+// TODO: Add other W instructions.
+static bool isSignExtendingOpW(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case RISCV::LUI:
+ case RISCV::LW:
+ case RISCV::ADDW:
+ case RISCV::ADDIW:
+ case RISCV::SUBW:
+ case RISCV::MULW:
+ case RISCV::SLLW:
+ case RISCV::SLLIW:
+ case RISCV::SRAW:
+ case RISCV::SRAIW:
+ case RISCV::SRLW:
+ case RISCV::SRLIW:
+ case RISCV::DIVW:
+ case RISCV::DIVUW:
+ case RISCV::REMW:
+ case RISCV::REMUW:
+ case RISCV::ROLW:
+ case RISCV::RORW:
+ case RISCV::RORIW:
+ case RISCV::CLZW:
+ case RISCV::CTZW:
+ case RISCV::CPOPW:
+ case RISCV::FCVT_W_H:
+ case RISCV::FCVT_WU_H:
+ case RISCV::FCVT_W_S:
+ case RISCV::FCVT_WU_S:
+ case RISCV::FCVT_W_D:
+ case RISCV::FCVT_WU_D:
+ // The following aren't W instructions, but are either sign extended from a
+ // smaller size or put zeros in bits 63:31.
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::SLT:
+ case RISCV::SLTI:
+ case RISCV::SLTU:
+ case RISCV::SLTIU:
+ case RISCV::SEXTB:
+ case RISCV::SEXTH:
+ case RISCV::ZEXTH_RV64:
+ return true;
+ // shifting right sufficiently makes the value 32-bit sign-extended
+ case RISCV::SRAI:
+ return MI.getOperand(2).getImm() >= 32;
+ case RISCV::SRLI:
+ return MI.getOperand(2).getImm() > 32;
+ // The LI pattern ADDI rd, X0, imm is sign extended.
+ case RISCV::ADDI:
+ return MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0;
+ // An ANDI with an 11 bit immediate will zero bits 63:11.
+ case RISCV::ANDI:
+ return isUInt<11>(MI.getOperand(2).getImm());
+ // An ORI with an >11 bit immediate (negative 12-bit) will set bits 63:11.
+ case RISCV::ORI:
+ return !isUInt<11>(MI.getOperand(2).getImm());
+ // Copying from X0 produces zero.
+ case RISCV::COPY:
+ return MI.getOperand(1).getReg() == RISCV::X0;
+ }
+
+ return false;
+}
+
+static bool isSignExtendedW(const MachineInstr &OrigMI,
+ MachineRegisterInfo &MRI) {
+
+ SmallPtrSet<const MachineInstr *, 4> Visited;
+ SmallVector<const MachineInstr *, 4> Worklist;
+
+ Worklist.push_back(&OrigMI);
+
+ while (!Worklist.empty()) {
+ const MachineInstr *MI = Worklist.pop_back_val();
+
+ // If we already visited this instruction, we don't need to check it again.
+ if (!Visited.insert(MI).second)
+ continue;
+
+ // If this is a sign extending operation we don't need to look any further.
+ if (isSignExtendingOpW(*MI))
+ continue;
+
+ // Is this an instruction that propagates sign extend.
+ switch (MI->getOpcode()) {
+ default:
+ // Unknown opcode, give up.
+ return false;
+ case RISCV::COPY: {
+ Register SrcReg = MI->getOperand(1).getReg();
+
+ // TODO: Handle arguments and returns from calls?
+
+ // If this is a copy from another register, check its source instruction.
+ if (!SrcReg.isVirtual())
+ return false;
+ const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ if (!SrcMI)
+ return false;
+
+ // Add SrcMI to the worklist.
+ Worklist.push_back(SrcMI);
+ break;
+ }
+ case RISCV::REM:
+ case RISCV::ANDI:
+ case RISCV::ORI:
+ case RISCV::XORI: {
+ // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
+ // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
+ // Logical operations use a sign extended 12-bit immediate. We just need
+ // to check if the other operand is sign extended.
+ Register SrcReg = MI->getOperand(1).getReg();
+ if (!SrcReg.isVirtual())
+ return false;
+ const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ if (!SrcMI)
+ return false;
+
+ // Add SrcMI to the worklist.
+ Worklist.push_back(SrcMI);
+ break;
+ }
+ case RISCV::REMU:
+ case RISCV::AND:
+ case RISCV::OR:
+ case RISCV::XOR:
+ case RISCV::ANDN:
+ case RISCV::ORN:
+ case RISCV::XNOR:
+ case RISCV::MAX:
+ case RISCV::MAXU:
+ case RISCV::MIN:
+ case RISCV::MINU:
+ case RISCV::PHI: {
+ // If all incoming values are sign-extended, the output of AND, OR, XOR,
+ // MIN, MAX, or PHI is also sign-extended.
+
+ // The input registers for PHI are operand 1, 3, ...
+ // The input registers for others are operand 1 and 2.
+ unsigned E = 3, D = 1;
+ if (MI->getOpcode() == RISCV::PHI) {
+ E = MI->getNumOperands();
+ D = 2;
+ }
+
+ for (unsigned I = 1; I != E; I += D) {
+ if (!MI->getOperand(I).isReg())
+ return false;
+
+ Register SrcReg = MI->getOperand(I).getReg();
+ if (!SrcReg.isVirtual())
+ return false;
+ const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ if (!SrcMI)
+ return false;
+
+ // Add SrcMI to the worklist.
+ Worklist.push_back(SrcMI);
+ }
+
+ break;
+ }
+ }
+ }
+
+ // If we get here, then every node we visited produces a sign extended value
+ // or propagated sign extended values. So the result must be sign extended.
+ return true;
+}
+
+bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()) || DisableSExtWRemoval)
+ return false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+
+ if (!ST.is64Bit())
+ return false;
+
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (auto I = MBB.begin(), IE = MBB.end(); I != IE;) {
+ MachineInstr *MI = &*I++;
+
+ // We're looking for the sext.w pattern ADDIW rd, rs1, 0.
+ if (MI->getOpcode() != RISCV::ADDIW || !MI->getOperand(2).isImm() ||
+ MI->getOperand(2).getImm() != 0 || !MI->getOperand(1).isReg())
+ continue;
+
+ // Input should be a virtual register.
+ Register SrcReg = MI->getOperand(1).getReg();
+ if (!SrcReg.isVirtual())
+ continue;
+
+ const MachineInstr &SrcMI = *MRI.getVRegDef(SrcReg);
+ if (!isSignExtendedW(SrcMI, MRI))
+ continue;
+
+ Register DstReg = MI->getOperand(0).getReg();
+ if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
+ MRI.replaceRegWith(DstReg, SrcReg);
+ MRI.clearKillFlags(SrcReg);
+ MI->eraseFromParent();
+ ++NumRemovedSExtW;
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index d5a0932c8778..78cf34c8c582 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -17,7 +17,10 @@ def RocketModel : SchedMachineModel {
let LoadLatency = 3;
let MispredictPenalty = 3;
let CompleteModel = false;
- let UnsupportedFeatures = [HasStdExtV, HasStdExtZvlsseg];
+ let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
+ HasStdExtZknd, HasStdExtZkne, HasStdExtZknh,
+ HasStdExtZksed, HasStdExtZksh, HasStdExtZkr,
+ HasVInstructions, HasVInstructionsI64];
}
//===----------------------------------------------------------------------===//
@@ -237,5 +240,8 @@ def : ReadAdvance<ReadFClass64, 0>;
defm : UnsupportedSchedV;
defm : UnsupportedSchedZba;
defm : UnsupportedSchedZbb;
+defm : UnsupportedSchedZbc;
+defm : UnsupportedSchedZbs;
+defm : UnsupportedSchedZbf;
defm : UnsupportedSchedZfh;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 7f9d0aabc4ed..9f5e5ff1223c 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -15,7 +15,10 @@ def SiFive7Model : SchedMachineModel {
let LoadLatency = 3;
let MispredictPenalty = 3;
let CompleteModel = 0;
- let UnsupportedFeatures = [HasStdExtV, HasStdExtZvlsseg];
+ let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
+ HasStdExtZknd, HasStdExtZkne, HasStdExtZknh,
+ HasStdExtZksed, HasStdExtZksh, HasStdExtZkr,
+ HasVInstructions];
}
// The SiFive7 microarchitecture has two pipelines: A and B.
@@ -224,5 +227,8 @@ def : ReadAdvance<ReadFClass64, 0>;
defm : UnsupportedSchedV;
defm : UnsupportedSchedZba;
defm : UnsupportedSchedZbb;
+defm : UnsupportedSchedZbc;
+defm : UnsupportedSchedZbs;
+defm : UnsupportedSchedZbf;
defm : UnsupportedSchedZfh;
}
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleB.td b/llvm/lib/Target/RISCV/RISCVScheduleB.td
index b668b0acd719..193760e1e15b 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleB.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleB.td
@@ -26,6 +26,17 @@ def WriteCPOP32 : SchedWrite;
def WriteREV8 : SchedWrite;
def WriteORCB : SchedWrite;
+// Zbc extension
+def WriteCLMUL : SchedWrite; // CLMUL/CLMULR/CLMULH
+
+// Zbs extension
+def WriteSingleBit : SchedWrite; // BCLR/BSET/BINV/BEXT
+def WriteSingleBitImm: SchedWrite; // BCLRI/BSETI/BINVI/BEXTI
+
+// Zbf extension
+def WriteBFP : SchedWrite; // BFP
+def WriteBFP32 : SchedWrite; // BFPW
+
/// Define scheduler resources associated with use operands.
// Zba extension
@@ -46,6 +57,17 @@ def ReadCPOP32 : SchedRead;
def ReadREV8 : SchedRead;
def ReadORCB : SchedRead;
+// Zbc extension
+def ReadCLMUL : SchedRead; // CLMUL/CLMULR/CLMULH
+
+// Zbs extension
+def ReadSingleBit : SchedRead; // BCLR/BSET/BINV/BEXT
+def ReadSingleBitImm: SchedRead; // BCLRI/BSETI/BINVI/BEXTI
+
+// Zbf extension
+def ReadBFP : SchedRead; // BFP
+def ReadBFP32 : SchedRead; // BFPW
+
/// Define default scheduler resources for B.
multiclass UnsupportedSchedZba {
@@ -87,3 +109,31 @@ def : ReadAdvance<ReadREV8, 0>;
def : ReadAdvance<ReadORCB, 0>;
}
}
+
+multiclass UnsupportedSchedZbc {
+let Unsupported = true in {
+def : WriteRes<WriteCLMUL, []>;
+
+def : ReadAdvance<ReadCLMUL, 0>;
+}
+}
+
+multiclass UnsupportedSchedZbs {
+let Unsupported = true in {
+def : WriteRes<WriteSingleBit, []>;
+def : WriteRes<WriteSingleBitImm, []>;
+
+def : ReadAdvance<ReadSingleBit, 0>;
+def : ReadAdvance<ReadSingleBitImm, 0>;
+}
+}
+
+multiclass UnsupportedSchedZbf {
+let Unsupported = true in {
+def : WriteRes<WriteBFP, []>;
+def : WriteRes<WriteBFP32, []>;
+
+def : ReadAdvance<ReadBFP, 0>;
+def : ReadAdvance<ReadBFP32, 0>;
+}
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 1063134b8a6c..976e4ccb1422 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -18,6 +18,7 @@
#include "RISCVRegisterBankInfo.h"
#include "RISCVTargetMachine.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -50,6 +51,16 @@ static cl::opt<unsigned> RVVVectorELENMax(
cl::desc("The maximum ELEN value to use for fixed length vectors."),
cl::init(64), cl::Hidden);
+static cl::opt<bool> RISCVDisableUsingConstantPoolForLargeInts(
+ "riscv-disable-using-constant-pool-for-large-ints",
+ cl::desc("Disable using constant pool for large integers."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> RISCVMaxBuildIntsCost(
+ "riscv-max-build-ints-cost",
+ cl::desc("The maximum cost used for building integers."), cl::init(0),
+ cl::Hidden);
+
void RISCVSubtarget::anchor() {}
RISCVSubtarget &
@@ -110,37 +121,69 @@ const RegisterBankInfo *RISCVSubtarget::getRegBankInfo() const {
return RegBankInfo.get();
}
+bool RISCVSubtarget::useConstantPoolForLargeInts() const {
+ return !RISCVDisableUsingConstantPoolForLargeInts;
+}
+
+unsigned RISCVSubtarget::getMaxBuildIntsCost() const {
+ // Loading integer from constant pool needs two instructions (the reason why
+ // the minimum cost is 2): an address calculation instruction and a load
+ // instruction. Usually, address calculation and instructions used for
+ // building integers (addi, slli, etc.) can be done in one cycle, so here we
+ // set the default cost to (LoadLatency + 1) if no threshold is provided.
+ return RISCVMaxBuildIntsCost == 0
+ ? getSchedModel().LoadLatency + 1
+ : std::max<unsigned>(2, RISCVMaxBuildIntsCost);
+}
+
unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const {
assert(hasVInstructions() &&
"Tried to get vector length without Zve or V extension support!");
if (RVVVectorBitsMax == 0)
return 0;
- assert(RVVVectorBitsMax >= 128 && RVVVectorBitsMax <= 65536 &&
- isPowerOf2_32(RVVVectorBitsMax) &&
- "V extension requires vector length to be in the range of 128 to "
- "65536 and a power of 2!");
+
+ // ZvlLen specifies the minimum required vlen. The upper bound provided by
+ // riscv-v-vector-bits-max should be no less than it.
+ if (RVVVectorBitsMax < ZvlLen)
+ report_fatal_error("riscv-v-vector-bits-max specified is lower "
+ "than the Zvl*b limitation");
+
+ // FIXME: Change to >= 32 when VLEN = 32 is supported
+ assert(
+ RVVVectorBitsMax >= 64 && RVVVectorBitsMax <= 65536 &&
+ isPowerOf2_32(RVVVectorBitsMax) &&
+ "V or Zve* extension requires vector length to be in the range of 64 to "
+ "65536 and a power of 2!");
assert(RVVVectorBitsMax >= RVVVectorBitsMin &&
"Minimum V extension vector length should not be larger than its "
"maximum!");
unsigned Max = std::max(RVVVectorBitsMin, RVVVectorBitsMax);
- return PowerOf2Floor((Max < 128 || Max > 65536) ? 0 : Max);
+ return PowerOf2Floor((Max < 64 || Max > 65536) ? 0 : Max);
}
unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const {
+ // ZvlLen specifies the minimum required vlen. The lower bound provided by
+ // riscv-v-vector-bits-min should be no less than it.
+ if (RVVVectorBitsMin != 0 && RVVVectorBitsMin < ZvlLen)
+ report_fatal_error("riscv-v-vector-bits-min specified is lower "
+ "than the Zvl*b limitation");
+
assert(hasVInstructions() &&
"Tried to get vector length without Zve or V extension support!");
- assert((RVVVectorBitsMin == 0 ||
- (RVVVectorBitsMin >= 128 && RVVVectorBitsMax <= 65536 &&
- isPowerOf2_32(RVVVectorBitsMin))) &&
- "V extension requires vector length to be in the range of 128 to "
- "65536 and a power of 2!");
+ // FIXME: Change to >= 32 when VLEN = 32 is supported
+ assert(
+ (RVVVectorBitsMin == 0 ||
+ (RVVVectorBitsMin >= 64 && RVVVectorBitsMin <= 65536 &&
+ isPowerOf2_32(RVVVectorBitsMin))) &&
+ "V or Zve* extension requires vector length to be in the range of 64 to "
+ "65536 and a power of 2!");
assert((RVVVectorBitsMax >= RVVVectorBitsMin || RVVVectorBitsMax == 0) &&
"Minimum V extension vector length should not be larger than its "
"maximum!");
unsigned Min = RVVVectorBitsMin;
if (RVVVectorBitsMax != 0)
Min = std::min(RVVVectorBitsMin, RVVVectorBitsMax);
- return PowerOf2Floor((Min < 128 || Min > 65536) ? 0 : Min);
+ return PowerOf2Floor((Min < 64 || Min > 65536) ? 0 : Min);
}
unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const {
@@ -158,8 +201,9 @@ unsigned RISCVSubtarget::getMaxELENForFixedLengthVectors() const {
assert(RVVVectorELENMax <= 64 && RVVVectorELENMax >= 8 &&
isPowerOf2_32(RVVVectorELENMax) &&
"V extension requires a ELEN to be a power of 2 between 8 and 64!");
+ unsigned ELEN = hasVInstructionsI64() ? 64 : 32;
return PowerOf2Floor(
- std::max<unsigned>(std::min<unsigned>(RVVVectorELENMax, 64), 8));
+ std::max<unsigned>(std::min<unsigned>(RVVVectorELENMax, ELEN), 8));
}
bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index d0330e6984a5..044dda0a1ccc 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -33,7 +33,33 @@ namespace llvm {
class StringRef;
class RISCVSubtarget : public RISCVGenSubtargetInfo {
+public:
+ enum ExtZvl : unsigned {
+ NotSet = 0,
+ Zvl32b = 32,
+ Zvl64b = 64,
+ Zvl128b = 128,
+ Zvl256b = 256,
+ Zvl512b = 512,
+ Zvl1024b = 1024,
+ Zvl2048b = 2048,
+ Zvl4096b = 4096,
+ Zvl8192b = 8192,
+ Zvl16384b = 16384,
+ Zvl32768b = 32768,
+ Zvl65536b = 65536
+ };
+
+ enum RISCVProcFamilyEnum : uint8_t {
+ Others,
+ SiFive7,
+ };
+
+private:
virtual void anchor();
+
+ RISCVProcFamilyEnum RISCVProcFamily = Others;
+
bool HasStdExtM = false;
bool HasStdExtA = false;
bool HasStdExtF = false;
@@ -50,15 +76,33 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool HasStdExtZbs = false;
bool HasStdExtZbt = false;
bool HasStdExtV = false;
- bool HasStdExtZvlsseg = false;
+ bool HasStdExtZve32x = false;
+ bool HasStdExtZve32f = false;
+ bool HasStdExtZve64x = false;
+ bool HasStdExtZve64f = false;
+ bool HasStdExtZve64d = false;
bool HasStdExtZfhmin = false;
bool HasStdExtZfh = false;
+ bool HasStdExtZbkb = false;
+ bool HasStdExtZbkc = false;
+ bool HasStdExtZbkx = false;
+ bool HasStdExtZknd = false;
+ bool HasStdExtZkne = false;
+ bool HasStdExtZknh = false;
+ bool HasStdExtZksed = false;
+ bool HasStdExtZksh = false;
+ bool HasStdExtZkr = false;
+ bool HasStdExtZkn = false;
+ bool HasStdExtZks = false;
+ bool HasStdExtZkt = false;
+ bool HasStdExtZk = false;
bool HasRV64 = false;
bool IsRV32E = false;
bool EnableLinkerRelax = false;
bool EnableRVCHintInstrs = true;
bool EnableSaveRestore = false;
unsigned XLen = 32;
+ ExtZvl ZvlLen = ExtZvl::NotSet;
MVT XLenVT = MVT::i32;
uint8_t MaxInterleaveFactor = 2;
RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
@@ -100,11 +144,19 @@ public:
return &TSInfo;
}
bool enableMachineScheduler() const override { return true; }
+
+ /// Returns RISCV processor family.
+ /// Avoid this function! CPU specifics should be kept local to this class
+ /// and preferably modeled with SubtargetFeatures or properties in
+ /// initializeProperties().
+ RISCVProcFamilyEnum getProcFamily() const { return RISCVProcFamily; }
+
bool hasStdExtM() const { return HasStdExtM; }
bool hasStdExtA() const { return HasStdExtA; }
bool hasStdExtF() const { return HasStdExtF; }
bool hasStdExtD() const { return HasStdExtD; }
bool hasStdExtC() const { return HasStdExtC; }
+ bool hasStdExtV() const { return HasStdExtV; }
bool hasStdExtZba() const { return HasStdExtZba; }
bool hasStdExtZbb() const { return HasStdExtZbb; }
bool hasStdExtZbc() const { return HasStdExtZbc; }
@@ -115,10 +167,18 @@ public:
bool hasStdExtZbr() const { return HasStdExtZbr; }
bool hasStdExtZbs() const { return HasStdExtZbs; }
bool hasStdExtZbt() const { return HasStdExtZbt; }
- bool hasStdExtV() const { return HasStdExtV; }
- bool hasStdExtZvlsseg() const { return HasStdExtZvlsseg; }
+ bool hasStdExtZvl() const { return ZvlLen != ExtZvl::NotSet; }
bool hasStdExtZfhmin() const { return HasStdExtZfhmin; }
bool hasStdExtZfh() const { return HasStdExtZfh; }
+ bool hasStdExtZbkb() const { return HasStdExtZbkb; }
+ bool hasStdExtZbkc() const { return HasStdExtZbkc; }
+ bool hasStdExtZbkx() const { return HasStdExtZbkx; }
+ bool hasStdExtZknd() const { return HasStdExtZknd; }
+ bool hasStdExtZkne() const { return HasStdExtZkne; }
+ bool hasStdExtZknh() const { return HasStdExtZknh; }
+ bool hasStdExtZksed() const { return HasStdExtZksed; }
+ bool hasStdExtZksh() const { return HasStdExtZksh; }
+ bool hasStdExtZkr() const { return HasStdExtZkr; }
bool is64Bit() const { return HasRV64; }
bool isRV32E() const { return IsRV32E; }
bool enableLinkerRelax() const { return EnableLinkerRelax; }
@@ -126,6 +186,15 @@ public:
bool enableSaveRestore() const { return EnableSaveRestore; }
MVT getXLenVT() const { return XLenVT; }
unsigned getXLen() const { return XLen; }
+ unsigned getFLen() const {
+ if (HasStdExtD)
+ return 64;
+
+ if (HasStdExtF)
+ return 32;
+
+ return 0;
+ }
RISCVABI::ABI getTargetABI() const { return TargetABI; }
bool isRegisterReservedByUser(Register i) const {
assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");
@@ -133,11 +202,19 @@ public:
}
// Vector codegen related methods.
- bool hasVInstructions() const { return HasStdExtV; }
- bool hasVInstructionsI64() const { return HasStdExtV; }
- bool hasVInstructionsF16() const { return HasStdExtV && hasStdExtZfh(); }
- bool hasVInstructionsF32() const { return HasStdExtV && hasStdExtF(); }
- bool hasVInstructionsF64() const { return HasStdExtV && hasStdExtD(); }
+ bool hasVInstructions() const { return HasStdExtV || HasStdExtZve32x; }
+ bool hasVInstructionsI64() const { return HasStdExtV || HasStdExtZve64x; }
+ bool hasVInstructionsF16() const {
+ return (HasStdExtV || HasStdExtZve32f) && HasStdExtZfh;
+ }
+ // FIXME: Consider Zfinx in the future
+ bool hasVInstructionsF32() const {
+ return HasStdExtV || (HasStdExtZve32f && HasStdExtF);
+ }
+ // FIXME: Consider Zdinx in the future
+ bool hasVInstructionsF64() const {
+ return HasStdExtV || (HasStdExtZve64d && HasStdExtD);
+ }
// F16 and F64 both require F32.
bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
unsigned getMaxInterleaveFactor() const {
@@ -157,6 +234,12 @@ public:
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
+ bool useConstantPoolForLargeInts() const;
+
+ // Maximum cost used for building integers, integers will be put into constant
+ // pool if exceeded.
+ unsigned getMaxBuildIntsCost() const;
+
// Return the known range for the bit length of RVV data registers. A value
// of 0 means nothing is known about that particular limit beyond what's
// implied by the architecture.
diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index 5a4c579dd708..b9aa25b321b0 100644
--- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -1,4 +1,4 @@
-//===- RISCVSystemOperands.td ----------------------------*- tablegen -*-===//
+//===- RISCVSystemOperands.td ------------------------------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -70,16 +70,16 @@ def lookupSysRegByDeprecatedName : SearchIndex {
// 2.3, 2.4 and 2.5 in the RISC-V Instruction Set Manual
// Volume II: Privileged Architecture.
-//===--------------------------
+//===----------------------------------------------------------------------===//
// User Trap Setup
-//===--------------------------
+//===----------------------------------------------------------------------===//
def : SysReg<"ustatus", 0x000>;
def : SysReg<"uie", 0x004>;
def : SysReg<"utvec", 0x005>;
-//===--------------------------
+//===----------------------------------------------------------------------===//
// User Trap Handling
-//===--------------------------
+//===----------------------------------------------------------------------===//
def : SysReg<"uscratch", 0x040>;
def : SysReg<"uepc", 0x041>;
def : SysReg<"ucause", 0x042>;
@@ -87,100 +87,57 @@ let DeprecatedName = "ubadaddr" in
def : SysReg<"utval", 0x043>;
def : SysReg<"uip", 0x044>;
-//===--------------------------
+//===----------------------------------------------------------------------===//
// User Floating-Point CSRs
-//===--------------------------
+//===----------------------------------------------------------------------===//
def SysRegFFLAGS : SysReg<"fflags", 0x001>;
def SysRegFRM : SysReg<"frm", 0x002>;
def SysRegFCSR : SysReg<"fcsr", 0x003>;
-//===--------------------------
+//===----------------------------------------------------------------------===//
// User Counter/Timers
-//===--------------------------
+//===----------------------------------------------------------------------===//
def CYCLE : SysReg<"cycle", 0xC00>;
def TIME : SysReg<"time", 0xC01>;
def INSTRET : SysReg<"instret", 0xC02>;
-def : SysReg<"hpmcounter3", 0xC03>;
-def : SysReg<"hpmcounter4", 0xC04>;
-def : SysReg<"hpmcounter5", 0xC05>;
-def : SysReg<"hpmcounter6", 0xC06>;
-def : SysReg<"hpmcounter7", 0xC07>;
-def : SysReg<"hpmcounter8", 0xC08>;
-def : SysReg<"hpmcounter9", 0xC09>;
-def : SysReg<"hpmcounter10", 0xC0A>;
-def : SysReg<"hpmcounter11", 0xC0B>;
-def : SysReg<"hpmcounter12", 0xC0C>;
-def : SysReg<"hpmcounter13", 0xC0D>;
-def : SysReg<"hpmcounter14", 0xC0E>;
-def : SysReg<"hpmcounter15", 0xC0F>;
-def : SysReg<"hpmcounter16", 0xC10>;
-def : SysReg<"hpmcounter17", 0xC11>;
-def : SysReg<"hpmcounter18", 0xC12>;
-def : SysReg<"hpmcounter19", 0xC13>;
-def : SysReg<"hpmcounter20", 0xC14>;
-def : SysReg<"hpmcounter21", 0xC15>;
-def : SysReg<"hpmcounter22", 0xC16>;
-def : SysReg<"hpmcounter23", 0xC17>;
-def : SysReg<"hpmcounter24", 0xC18>;
-def : SysReg<"hpmcounter25", 0xC19>;
-def : SysReg<"hpmcounter26", 0xC1A>;
-def : SysReg<"hpmcounter27", 0xC1B>;
-def : SysReg<"hpmcounter28", 0xC1C>;
-def : SysReg<"hpmcounter29", 0xC1D>;
-def : SysReg<"hpmcounter30", 0xC1E>;
-def : SysReg<"hpmcounter31", 0xC1F>;
+// hpmcounter3-hpmcounter31 at 0xC03-0xC1F.
+foreach i = 3...31 in
+ def : SysReg<"hpmcounter"#i, !add(0xC03, !sub(i, 3))>;
let isRV32Only = 1 in {
def CYCLEH : SysReg<"cycleh", 0xC80>;
def TIMEH : SysReg<"timeh", 0xC81>;
def INSTRETH : SysReg<"instreth", 0xC82>;
-def: SysReg<"hpmcounter3h", 0xC83>;
-def: SysReg<"hpmcounter4h", 0xC84>;
-def: SysReg<"hpmcounter5h", 0xC85>;
-def: SysReg<"hpmcounter6h", 0xC86>;
-def: SysReg<"hpmcounter7h", 0xC87>;
-def: SysReg<"hpmcounter8h", 0xC88>;
-def: SysReg<"hpmcounter9h", 0xC89>;
-def: SysReg<"hpmcounter10h", 0xC8A>;
-def: SysReg<"hpmcounter11h", 0xC8B>;
-def: SysReg<"hpmcounter12h", 0xC8C>;
-def: SysReg<"hpmcounter13h", 0xC8D>;
-def: SysReg<"hpmcounter14h", 0xC8E>;
-def: SysReg<"hpmcounter15h", 0xC8F>;
-def: SysReg<"hpmcounter16h", 0xC90>;
-def: SysReg<"hpmcounter17h", 0xC91>;
-def: SysReg<"hpmcounter18h", 0xC92>;
-def: SysReg<"hpmcounter19h", 0xC93>;
-def: SysReg<"hpmcounter20h", 0xC94>;
-def: SysReg<"hpmcounter21h", 0xC95>;
-def: SysReg<"hpmcounter22h", 0xC96>;
-def: SysReg<"hpmcounter23h", 0xC97>;
-def: SysReg<"hpmcounter24h", 0xC98>;
-def: SysReg<"hpmcounter25h", 0xC99>;
-def: SysReg<"hpmcounter26h", 0xC9A>;
-def: SysReg<"hpmcounter27h", 0xC9B>;
-def: SysReg<"hpmcounter28h", 0xC9C>;
-def: SysReg<"hpmcounter29h", 0xC9D>;
-def: SysReg<"hpmcounter30h", 0xC9E>;
-def: SysReg<"hpmcounter31h", 0xC9F>;
+// hpmcounter3h-hpmcounter31h at 0xC83-0xC9F.
+foreach i = 3...31 in
+ def : SysReg<"hpmcounter"#i#"h", !add(0xC83, !sub(i, 3))>;
}
-//===--------------------------
+//===----------------------------------------------------------------------===//
// Supervisor Trap Setup
-//===--------------------------
+//===----------------------------------------------------------------------===//
def : SysReg<"sstatus", 0x100>;
def : SysReg<"sedeleg", 0x102>;
def : SysReg<"sideleg", 0x103>;
def : SysReg<"sie", 0x104>;
def : SysReg<"stvec", 0x105>;
def : SysReg<"scounteren", 0x106>;
+def : SysReg<"stimecmp", 0x14D>;
+let isRV32Only = 1 in
+def : SysReg<"stimecmph", 0x15D>;
+
+//===----------------------------------------------------------------------===//
+// Supervisor Configuration
+//===----------------------------------------------------------------------===//
+
+def : SysReg<"senvcfg", 0x10A>;
-//===--------------------------
+//===----------------------------------------------------------------------===//
// Supervisor Trap Handling
-//===--------------------------
+//===----------------------------------------------------------------------===//
def : SysReg<"sscratch", 0x140>;
def : SysReg<"sepc", 0x141>;
def : SysReg<"scause", 0x142>;
@@ -188,24 +145,103 @@ let DeprecatedName = "sbadaddr" in
def : SysReg<"stval", 0x143>;
def : SysReg<"sip", 0x144>;
-//===-------------------------------------
+//===----------------------------------------------------------------------===//
// Supervisor Protection and Translation
-//===-------------------------------------
+//===----------------------------------------------------------------------===//
let DeprecatedName = "sptbr" in
def : SysReg<"satp", 0x180>;
-//===-----------------------------
+//===----------------------------------------------------------------------===//
+// Debug/Trace Registers
+//===----------------------------------------------------------------------===//
+
+def : SysReg<"scontext", 0x5A8>;
+
+//===----------------------------------------------------------------------===//
+// Supervisor Count Overflow (defined in Sscofpmf)
+//===----------------------------------------------------------------------===//
+
+def : SysReg<"scountovf", 0xDA0>;
+
+//===----------------------------------------------------------------------===//
+// Hypervisor Trap Setup
+//===----------------------------------------------------------------------===//
+
+def : SysReg<"hstatus", 0x600>;
+def : SysReg<"hedeleg", 0x602>;
+def : SysReg<"hideleg", 0x603>;
+def : SysReg<"hie", 0x604>;
+def : SysReg<"hcounteren", 0x606>;
+def : SysReg<"hgeie", 0x607>;
+
+//===----------------------------------------------------------------------===//
+// Hypervisor Trap Handling
+//===----------------------------------------------------------------------===//
+
+def : SysReg<"htval", 0x643>;
+def : SysReg<"hip", 0x644>;
+def : SysReg<"hvip", 0x645>;
+def : SysReg<"htinst", 0x64A>;
+def : SysReg<"hgeip", 0xE12>;
+
+//===----------------------------------------------------------------------===//
+// Hypervisor Configuration
+//===----------------------------------------------------------------------===//
+
+def : SysReg<"henvcfg", 0x60A>;
+let isRV32Only = 1 in
+def : SysReg<"henvcfgh", 0x61A>;
+
+//===----------------------------------------------------------------------===//
+// Hypervisor Protection and Translation
+//===----------------------------------------------------------------------===//
+
+def : SysReg<"hgatp", 0x680>;
+
+//===----------------------------------------------------------------------===//
+// Debug/Trace Registers
+//===----------------------------------------------------------------------===//
+
+def : SysReg<"hcontext", 0x6A8>;
+
+//===----------------------------------------------------------------------===//
+// Hypervisor Counter/Timer Virtualization Registers
+//===----------------------------------------------------------------------===//
+
+def : SysReg<"htimedelta", 0x605>;
+let isRV32Only = 1 in
+def : SysReg<"htimedeltah", 0x615>;
+
+//===----------------------------------------------------------------------===//
+// Virtual Supervisor Registers
+//===----------------------------------------------------------------------===//
+
+def : SysReg<"vsstatus", 0x200>;
+def : SysReg<"vsie", 0x204>;
+def : SysReg<"vstvec", 0x205>;
+def : SysReg<"vsscratch", 0x240>;
+def : SysReg<"vsepc", 0x241>;
+def : SysReg<"vscause", 0x242>;
+def : SysReg<"vstval", 0x243>;
+def : SysReg<"vsip", 0x244>;
+def : SysReg<"vstimecmp", 0x24D>;
+let isRV32Only = 1 in
+def : SysReg<"vstimecmph", 0x25D>;
+def : SysReg<"vsatp", 0x280>;
+
+//===----------------------------------------------------------------------===//
// Machine Information Registers
-//===-----------------------------
+//===----------------------------------------------------------------------===//
def : SysReg<"mvendorid", 0xF11>;
def : SysReg<"marchid", 0xF12>;
def : SysReg<"mimpid", 0xF13>;
def : SysReg<"mhartid", 0xF14>;
+def : SysReg<"mconfigptr", 0xF15>;
-//===-----------------------------
+//===----------------------------------------------------------------------===//
// Machine Trap Setup
-//===-----------------------------
+//===----------------------------------------------------------------------===//
def : SysReg<"mstatus", 0x300>;
def : SysReg<"misa", 0x301>;
def : SysReg<"medeleg", 0x302>;
@@ -213,163 +249,93 @@ def : SysReg<"mideleg", 0x303>;
def : SysReg<"mie", 0x304>;
def : SysReg<"mtvec", 0x305>;
def : SysReg<"mcounteren", 0x306>;
+let isRV32Only = 1 in
+def : SysReg<"mstatush", 0x310>;
-//===-----------------------------
+//===----------------------------------------------------------------------===//
// Machine Trap Handling
-//===-----------------------------
+//===----------------------------------------------------------------------===//
def : SysReg<"mscratch", 0x340>;
def : SysReg<"mepc", 0x341>;
def : SysReg<"mcause", 0x342>;
let DeprecatedName = "mbadaddr" in
def : SysReg<"mtval", 0x343>;
def : SysReg<"mip", 0x344>;
+def : SysReg<"mtinst", 0x34A>;
+def : SysReg<"mtval2", 0x34B>;
-//===----------------------------------
+//===----------------------------------------------------------------------===//
+// Machine Configuration
+//===----------------------------------------------------------------------===//
+
+def : SysReg<"menvcfg", 0x30A>;
+let isRV32Only = 1 in
+def : SysReg<"menvcfgh", 0x31A>;
+def : SysReg<"mseccfg", 0x747>;
+let isRV32Only = 1 in
+def : SysReg<"mseccfgh", 0x757>;
+
+//===----------------------------------------------------------------------===//
// Machine Protection and Translation
-//===----------------------------------
-def : SysReg<"pmpcfg0", 0x3A0>;
-def : SysReg<"pmpcfg2", 0x3A2>;
-let isRV32Only = 1 in {
-def : SysReg<"pmpcfg1", 0x3A1>;
-def : SysReg<"pmpcfg3", 0x3A3>;
+//===----------------------------------------------------------------------===//
+
+// pmpcfg0-pmpcfg15 at 0x3A0-0x3AF. Odd-numbered registers are RV32-only.
+foreach i = 0...15 in {
+ let isRV32Only = !and(i, 1) in
+ def : SysReg<"pmpcfg"#i, !add(0x3A0, i)>;
}
-def : SysReg<"pmpaddr0", 0x3B0>;
-def : SysReg<"pmpaddr1", 0x3B1>;
-def : SysReg<"pmpaddr2", 0x3B2>;
-def : SysReg<"pmpaddr3", 0x3B3>;
-def : SysReg<"pmpaddr4", 0x3B4>;
-def : SysReg<"pmpaddr5", 0x3B5>;
-def : SysReg<"pmpaddr6", 0x3B6>;
-def : SysReg<"pmpaddr7", 0x3B7>;
-def : SysReg<"pmpaddr8", 0x3B8>;
-def : SysReg<"pmpaddr9", 0x3B9>;
-def : SysReg<"pmpaddr10", 0x3BA>;
-def : SysReg<"pmpaddr11", 0x3BB>;
-def : SysReg<"pmpaddr12", 0x3BC>;
-def : SysReg<"pmpaddr13", 0x3BD>;
-def : SysReg<"pmpaddr14", 0x3BE>;
-def : SysReg<"pmpaddr15", 0x3BF>;
-
-
-//===--------------------------
+// pmpaddr0-pmpaddr63 at 0x3B0-0x3EF.
+foreach i = 0...63 in
+ def : SysReg<"pmpaddr"#i, !add(0x3B0, i)>;
+
+//===----------------------------------------------------------------------===//
// Machine Counter and Timers
-//===--------------------------
+//===----------------------------------------------------------------------===//
def : SysReg<"mcycle", 0xB00>;
def : SysReg<"minstret", 0xB02>;
-def : SysReg<"mhpmcounter3", 0xB03>;
-def : SysReg<"mhpmcounter4", 0xB04>;
-def : SysReg<"mhpmcounter5", 0xB05>;
-def : SysReg<"mhpmcounter6", 0xB06>;
-def : SysReg<"mhpmcounter7", 0xB07>;
-def : SysReg<"mhpmcounter8", 0xB08>;
-def : SysReg<"mhpmcounter9", 0xB09>;
-def : SysReg<"mhpmcounter10", 0xB0A>;
-def : SysReg<"mhpmcounter11", 0xB0B>;
-def : SysReg<"mhpmcounter12", 0xB0C>;
-def : SysReg<"mhpmcounter13", 0xB0D>;
-def : SysReg<"mhpmcounter14", 0xB0E>;
-def : SysReg<"mhpmcounter15", 0xB0F>;
-def : SysReg<"mhpmcounter16", 0xB10>;
-def : SysReg<"mhpmcounter17", 0xB11>;
-def : SysReg<"mhpmcounter18", 0xB12>;
-def : SysReg<"mhpmcounter19", 0xB13>;
-def : SysReg<"mhpmcounter20", 0xB14>;
-def : SysReg<"mhpmcounter21", 0xB15>;
-def : SysReg<"mhpmcounter22", 0xB16>;
-def : SysReg<"mhpmcounter23", 0xB17>;
-def : SysReg<"mhpmcounter24", 0xB18>;
-def : SysReg<"mhpmcounter25", 0xB19>;
-def : SysReg<"mhpmcounter26", 0xB1A>;
-def : SysReg<"mhpmcounter27", 0xB1B>;
-def : SysReg<"mhpmcounter28", 0xB1C>;
-def : SysReg<"mhpmcounter29", 0xB1D>;
-def : SysReg<"mhpmcounter30", 0xB1E>;
-def : SysReg<"mhpmcounter31", 0xB1F>;
+// mhpmcounter3-mhpmcounter31 at 0xB03-0xB1F.
+foreach i = 3...31 in
+ def : SysReg<"mhpmcounter"#i, !add(0xB03, !sub(i, 3))>;
let isRV32Only = 1 in {
def: SysReg<"mcycleh", 0xB80>;
def: SysReg<"minstreth", 0xB82>;
-def: SysReg<"mhpmcounter3h", 0xB83>;
-def: SysReg<"mhpmcounter4h", 0xB84>;
-def: SysReg<"mhpmcounter5h", 0xB85>;
-def: SysReg<"mhpmcounter6h", 0xB86>;
-def: SysReg<"mhpmcounter7h", 0xB87>;
-def: SysReg<"mhpmcounter8h", 0xB88>;
-def: SysReg<"mhpmcounter9h", 0xB89>;
-def: SysReg<"mhpmcounter10h", 0xB8A>;
-def: SysReg<"mhpmcounter11h", 0xB8B>;
-def: SysReg<"mhpmcounter12h", 0xB8C>;
-def: SysReg<"mhpmcounter13h", 0xB8D>;
-def: SysReg<"mhpmcounter14h", 0xB8E>;
-def: SysReg<"mhpmcounter15h", 0xB8F>;
-def: SysReg<"mhpmcounter16h", 0xB90>;
-def: SysReg<"mhpmcounter17h", 0xB91>;
-def: SysReg<"mhpmcounter18h", 0xB92>;
-def: SysReg<"mhpmcounter19h", 0xB93>;
-def: SysReg<"mhpmcounter20h", 0xB94>;
-def: SysReg<"mhpmcounter21h", 0xB95>;
-def: SysReg<"mhpmcounter22h", 0xB96>;
-def: SysReg<"mhpmcounter23h", 0xB97>;
-def: SysReg<"mhpmcounter24h", 0xB98>;
-def: SysReg<"mhpmcounter25h", 0xB99>;
-def: SysReg<"mhpmcounter26h", 0xB9A>;
-def: SysReg<"mhpmcounter27h", 0xB9B>;
-def: SysReg<"mhpmcounter28h", 0xB9C>;
-def: SysReg<"mhpmcounter29h", 0xB9D>;
-def: SysReg<"mhpmcounter30h", 0xB9E>;
-def: SysReg<"mhpmcounter31h", 0xB9F>;
+// mhpmcounter3h-mhpmcounter31h at 0xB83-0xB9F.
+foreach i = 3...31 in
+ def : SysReg<"mhpmcounter"#i#"h", !add(0xB83, !sub(i, 3))>;
}
-//===--------------------------
+//===----------------------------------------------------------------------===//
// Machine Counter Setup
-//===--------------------------
+//===----------------------------------------------------------------------===//
let AltName = "mucounteren" in // Privileged spec v1.9.1 Name
def : SysReg<"mcountinhibit", 0x320>;
-def : SysReg<"mhpmevent3", 0x323>;
-def : SysReg<"mhpmevent4", 0x324>;
-def : SysReg<"mhpmevent5", 0x325>;
-def : SysReg<"mhpmevent6", 0x326>;
-def : SysReg<"mhpmevent7", 0x327>;
-def : SysReg<"mhpmevent8", 0x328>;
-def : SysReg<"mhpmevent9", 0x329>;
-def : SysReg<"mhpmevent10", 0x32A>;
-def : SysReg<"mhpmevent11", 0x32B>;
-def : SysReg<"mhpmevent12", 0x32C>;
-def : SysReg<"mhpmevent13", 0x32D>;
-def : SysReg<"mhpmevent14", 0x32E>;
-def : SysReg<"mhpmevent15", 0x32F>;
-def : SysReg<"mhpmevent16", 0x330>;
-def : SysReg<"mhpmevent17", 0x331>;
-def : SysReg<"mhpmevent18", 0x332>;
-def : SysReg<"mhpmevent19", 0x333>;
-def : SysReg<"mhpmevent20", 0x334>;
-def : SysReg<"mhpmevent21", 0x335>;
-def : SysReg<"mhpmevent22", 0x336>;
-def : SysReg<"mhpmevent23", 0x337>;
-def : SysReg<"mhpmevent24", 0x338>;
-def : SysReg<"mhpmevent25", 0x339>;
-def : SysReg<"mhpmevent26", 0x33A>;
-def : SysReg<"mhpmevent27", 0x33B>;
-def : SysReg<"mhpmevent28", 0x33C>;
-def : SysReg<"mhpmevent29", 0x33D>;
-def : SysReg<"mhpmevent30", 0x33E>;
-def : SysReg<"mhpmevent31", 0x33F>;
+// mhpmevent3-mhpmevent31 at 0x323-0x33F.
+foreach i = 3...31 in
+ def : SysReg<"mhpmevent"#i, !add(0x323, !sub(i, 3))>;
-//===-----------------------------------------------
+// mhpmevent3h-mhpmevent31h at 0x723-0x73F
+foreach i = 3...31 in {
+ let isRV32Only = 1 in
+ def : SysReg<"mhpmevent"#i#"h", !add(0x723, !sub(i, 3))>;
+}
+
+//===----------------------------------------------------------------------===//
// Debug/ Trace Registers (shared with Debug Mode)
-//===-----------------------------------------------
+//===----------------------------------------------------------------------===//
def : SysReg<"tselect", 0x7A0>;
def : SysReg<"tdata1", 0x7A1>;
def : SysReg<"tdata2", 0x7A2>;
def : SysReg<"tdata3", 0x7A3>;
+def : SysReg<"mcontext", 0x7A8>;
-//===-----------------------------------------------
+//===----------------------------------------------------------------------===//
// Debug Mode Registers
-//===-----------------------------------------------
+//===----------------------------------------------------------------------===//
def : SysReg<"dcsr", 0x7B0>;
def : SysReg<"dpc", 0x7B1>;
@@ -379,9 +345,9 @@ let AltName = "dscratch" in
def : SysReg<"dscratch0", 0x7B2>;
def : SysReg<"dscratch1", 0x7B3>;
-//===-----------------------------------------------
+//===----------------------------------------------------------------------===//
// User Vector CSRs
-//===-----------------------------------------------
+//===----------------------------------------------------------------------===//
def : SysReg<"vstart", 0x008>;
def : SysReg<"vxsat", 0x009>;
def : SysReg<"vxrm", 0x00A>;
@@ -389,3 +355,26 @@ def : SysReg<"vcsr", 0x00F>;
def : SysReg<"vl", 0xC20>;
def : SysReg<"vtype", 0xC21>;
def SysRegVLENB: SysReg<"vlenb", 0xC22>;
+
+//===----------------------------------------------------------------------===//
+// State Enable Extension (Smstateen)
+//===----------------------------------------------------------------------===//
+
+// sstateen0-sstateen3 at 0x10C-0x10F, mstateen0-mstateen3 at 0x30C-0x30F,
+// mstateen0h-mstateen3h at 0x31C-0x31F, hstateen0-hstateen3 at 0x60C-0x60F,
+// and hstateen0h-hstateen3h at 0x61C-0x61F.
+foreach i = 0...3 in {
+ def : SysReg<"sstateen"#i, !add(0x10C, i)>;
+ def : SysReg<"mstateen"#i, !add(0x30C, i)>;
+ let isRV32Only = 1 in
+ def : SysReg<"mstateen"#i#"h", !add(0x31C, i)>;
+ def : SysReg<"hstateen"#i, !add(0x60C, i)>;
+ let isRV32Only = 1 in
+ def : SysReg<"hstateen"#i#"h", !add(0x61C, i)>;
+}
+
+//===-----------------------------------------------
+// Entropy Source CSR
+//===-----------------------------------------------
+
+def SEED : SysReg<"seed", 0x015>;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b421eba8d442..db5e2f1eeb6f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -39,6 +39,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeGlobalISel(*PR);
initializeRISCVGatherScatterLoweringPass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
+ initializeRISCVSExtWRemovalPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
}
@@ -140,6 +141,7 @@ public:
void addPreEmitPass() override;
void addPreEmitPass2() override;
void addPreSched2() override;
+ void addMachineSSAOptimization() override;
void addPreRegAlloc() override;
};
} // namespace
@@ -194,6 +196,13 @@ void RISCVPassConfig::addPreEmitPass2() {
addPass(createRISCVExpandAtomicPseudoPass());
}
+void RISCVPassConfig::addMachineSSAOptimization() {
+ TargetPassConfig::addMachineSSAOptimization();
+
+ if (TM->getTargetTriple().getArch() == Triple::riscv64)
+ addPass(createRISCVSExtWRemovalPass());
+}
+
void RISCVPassConfig::addPreRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createRISCVMergeBaseOffsetOptPass());
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index c435430a1288..99e6774a02e4 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -15,6 +15,13 @@ using namespace llvm;
#define DEBUG_TYPE "riscvtti"
+static cl::opt<unsigned> RVVRegisterWidthLMUL(
+ "riscv-v-register-bit-width-lmul",
+ cl::desc(
+ "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
+ "by autovectorized code. Fractional LMULs are not supported."),
+ cl::init(1), cl::Hidden);
+
InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy() &&
@@ -137,6 +144,24 @@ Optional<unsigned> RISCVTTIImpl::getMaxVScale() const {
return BaseT::getMaxVScale();
}
+TypeSize
+RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
+ unsigned LMUL = PowerOf2Floor(
+ std::max<unsigned>(std::min<unsigned>(RVVRegisterWidthLMUL, 8), 1));
+ switch (K) {
+ case TargetTransformInfo::RGK_Scalar:
+ return TypeSize::getFixed(ST->getXLen());
+ case TargetTransformInfo::RGK_FixedWidthVector:
+ return TypeSize::getFixed(
+ ST->hasVInstructions() ? LMUL * ST->getMinRVVVectorSizeInBits() : 0);
+ case TargetTransformInfo::RGK_ScalableVector:
+ return TypeSize::getScalable(
+ ST->hasVInstructions() ? LMUL * RISCV::RVVBitsPerBlock : 0);
+ }
+
+ llvm_unreachable("Unsupported register kind");
+}
+
InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
@@ -172,10 +197,7 @@ void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// Support explicit targets enabled for SiFive with the unrolling preferences
// below
bool UseDefaultPreferences = true;
- if (ST->getTuneCPU().contains("sifive-e76") ||
- ST->getTuneCPU().contains("sifive-s76") ||
- ST->getTuneCPU().contains("sifive-u74") ||
- ST->getTuneCPU().contains("sifive-7"))
+ if (ST->getProcFamily() == RISCVSubtarget::SiFive7)
UseDefaultPreferences = false;
if (UseDefaultPreferences)
@@ -253,3 +275,16 @@ void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}
+
+InstructionCost RISCVTTIImpl::getRegUsageForType(Type *Ty) {
+ TypeSize Size = Ty->getPrimitiveSizeInBits();
+ if (Ty->isVectorTy()) {
+ if (Size.isScalable() && ST->hasVInstructions())
+ return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock);
+
+ if (ST->useRVVForFixedLengthVectors())
+ return divideCeil(Size, ST->getMinRVVVectorSizeInBits());
+ }
+
+ return BaseT::getRegUsageForType(Ty);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 7353496f4684..e79c4f75712b 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -58,20 +58,9 @@ public:
bool supportsScalableVectors() const { return ST->hasVInstructions(); }
Optional<unsigned> getMaxVScale() const;
- TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
- switch (K) {
- case TargetTransformInfo::RGK_Scalar:
- return TypeSize::getFixed(ST->getXLen());
- case TargetTransformInfo::RGK_FixedWidthVector:
- return TypeSize::getFixed(
- ST->hasVInstructions() ? ST->getMinRVVVectorSizeInBits() : 0);
- case TargetTransformInfo::RGK_ScalableVector:
- return TypeSize::getScalable(
- ST->hasVInstructions() ? RISCV::RVVBitsPerBlock : 0);
- }
+ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
- llvm_unreachable("Unsupported register kind");
- }
+ InstructionCost getRegUsageForType(Type *Ty);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
@@ -81,7 +70,7 @@ public:
TTI::PeelingPreferences &PP);
unsigned getMinVectorRegisterBitWidth() const {
- return ST->hasVInstructions() ? ST->getMinRVVVectorSizeInBits() : 0;
+ return ST->useRVVForFixedLengthVectors() ? 16 : 0;
}
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
@@ -189,6 +178,20 @@ public:
// Let regular unroll to unroll the loop.
return VF == 1 ? 1 : ST->getMaxInterleaveFactor();
}
+
+ // TODO: We should define RISC-V's own register classes.
+ // e.g. register class for FPR.
+ unsigned getNumberOfRegisters(unsigned ClassID) const {
+ bool Vector = (ClassID == 1);
+ if (Vector) {
+ if (ST->hasVInstructions())
+ return 32;
+ return 0;
+ }
+ // 31 = 32 GPR - x0 (zero register)
+ // FIXME: Should we exclude fixed registers like SP, TP or GP?
+ return 31;
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 48e6903bd1b1..af3304f0907d 100644
--- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -257,7 +257,7 @@ private:
};
public:
- SparcOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ SparcOperand(KindTy K) : Kind(K) {}
bool isToken() const override { return Kind == k_Token; }
bool isReg() const override { return Kind == k_Register; }
diff --git a/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index afb69899e724..c5d0f1de7dfd 100644
--- a/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -66,7 +66,7 @@ private:
} // end anonymous namespace
SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
- unsigned GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF);
+ Register GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF);
return CurDAG->getRegister(GlobalBaseReg,
TLI->getPointerTy(CurDAG->getDataLayout()))
.getNode();
@@ -168,8 +168,7 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
// placement.
SDLoc dl(N);
- SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
- : SDValue(nullptr,0);
+ SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
SmallVector<bool, 8> OpChanged;
// Glue node will be appended late.
@@ -221,8 +220,8 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
SDValue V0 = N->getOperand(i+1);
SDValue V1 = N->getOperand(i+2);
- unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
- unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+ Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
+ Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
SDValue PairedReg;
MachineRegisterInfo &MRI = MF->getRegInfo();
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index ed1faf6b1fe8..6d6879bc94b3 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -826,7 +826,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// sret only allowed on first argument
assert(Outs[realArgIdx].OrigArgIndex == 0);
PointerType *Ty = cast<PointerType>(CLI.getArgs()[0].Ty);
- Type *ElementTy = Ty->getElementType();
+ Type *ElementTy = Ty->getPointerElementType();
SRetArgSize = DAG.getDataLayout().getTypeAllocSize(ElementTy);
continue;
}
@@ -2684,7 +2684,7 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
SDValue RetAddr;
if (depth == 0) {
auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
- unsigned RetReg = MF.addLiveIn(SP::I7, TLI.getRegClassFor(PtrVT));
+ Register RetReg = MF.addLiveIn(SP::I7, TLI.getRegClassFor(PtrVT));
RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
return RetAddr;
}
@@ -3245,7 +3245,7 @@ LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
- SDValue Result(nullptr, 0);
+ SDValue Result;
// Only support length 1 constraints for now.
if (Constraint.length() > 1)
diff --git a/llvm/lib/Target/Sparc/SparcTargetObjectFile.h b/llvm/lib/Target/Sparc/SparcTargetObjectFile.h
index 9bbe602b32b3..f30ddc7b4955 100644
--- a/llvm/lib/Target/Sparc/SparcTargetObjectFile.h
+++ b/llvm/lib/Target/Sparc/SparcTargetObjectFile.h
@@ -18,9 +18,7 @@ class TargetMachine;
class SparcELFTargetObjectFile : public TargetLoweringObjectFileELF {
public:
- SparcELFTargetObjectFile() :
- TargetLoweringObjectFileELF()
- {}
+ SparcELFTargetObjectFile() {}
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 899fec6c3328..e76fa03af3bf 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -23,11 +23,7 @@ class MCObjectTargetWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class MCTargetOptions;
-class StringRef;
class Target;
-class Triple;
-class raw_pwrite_stream;
-class raw_ostream;
namespace SystemZMC {
// How many bytes are in the ABI-defined, caller-allocated part of
diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h
index bedbd061ea5c..5be19f0e3b46 100644
--- a/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/llvm/lib/Target/SystemZ/SystemZ.h
@@ -20,6 +20,7 @@
namespace llvm {
class SystemZTargetMachine;
class FunctionPass;
+class PassRegistry;
namespace SystemZ {
// Condition-code mask values.
@@ -196,6 +197,15 @@ FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZTDCPass();
+
+void initializeSystemZElimComparePass(PassRegistry &);
+void initializeSystemZShortenInstPass(PassRegistry &);
+void initializeSystemZLongBranchPass(PassRegistry &);
+void initializeSystemZLDCleanupPass(PassRegistry &);
+void initializeSystemZCopyPhysRegsPass(PassRegistry &);
+void initializeSystemZPostRewritePass(PassRegistry &);
+void initializeSystemZTDCPassPass(PassRegistry &);
+
} // end namespace llvm
#endif
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index defab665f924..e01adcce04ab 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -786,6 +786,50 @@ void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {
emitStackMaps(SM);
}
+void SystemZAsmPrinter::emitFunctionEntryLabel() {
+ const SystemZSubtarget &Subtarget =
+ static_cast<const SystemZSubtarget &>(MF->getSubtarget());
+
+ if (Subtarget.getTargetTriple().isOSzOS()) {
+ MCContext &OutContext = OutStreamer->getContext();
+ MCSymbol *EPMarkerSym = OutContext.createTempSymbol("CM_", true);
+
+ // EntryPoint Marker
+ const MachineFrameInfo &MFFrame = MF->getFrameInfo();
+ bool IsUsingAlloca = MFFrame.hasVarSizedObjects();
+
+ // Set Flags
+ uint8_t Flags = 0;
+ if (IsUsingAlloca)
+ Flags |= 0x04;
+
+ uint32_t DSASize = MFFrame.getStackSize();
+
+ // Combine into top 27 bits of DSASize and bottom 5 bits of Flags.
+ uint32_t DSAAndFlags = DSASize & 0xFFFFFFE0; // (x/32) << 5
+ DSAAndFlags |= Flags;
+
+ // Emit entry point marker section.
+ OutStreamer->AddComment("XPLINK Routine Layout Entry");
+ OutStreamer->emitLabel(EPMarkerSym);
+ OutStreamer->AddComment("Eyecatcher 0x00C300C500C500");
+ OutStreamer->emitIntValueInHex(0x00C300C500C500, 7); // Eyecatcher.
+ OutStreamer->AddComment("Mark Type C'1'");
+ OutStreamer->emitInt8(0xF1); // Mark Type.
+ if (OutStreamer->isVerboseAsm()) {
+ OutStreamer->AddComment("DSA Size 0x" + Twine::utohexstr(DSASize));
+ OutStreamer->AddComment("Entry Flags");
+ if (Flags & 0x04)
+ OutStreamer->AddComment(" Bit 2: 1 = Uses alloca");
+ else
+ OutStreamer->AddComment(" Bit 2: 0 = Does not use alloca");
+ }
+ OutStreamer->emitInt32(DSAAndFlags);
+ }
+
+ AsmPrinter::emitFunctionEntryLabel();
+}
+
// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZAsmPrinter() {
RegisterAsmPrinter<SystemZAsmPrinter> X(getTheSystemZTarget());
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
index 6cfd7bd4c486..80d68d1b93ff 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -19,7 +19,6 @@
namespace llvm {
class MCStreamer;
-class MachineBasicBlock;
class MachineInstr;
class Module;
class raw_ostream;
@@ -52,6 +51,7 @@ public:
SM.reset();
return AsmPrinter::doInitialization(M);
}
+ void emitFunctionEntryLabel() override;
private:
void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL);
diff --git a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
index 7d21d29d270e..763aa8c0e41f 100644
--- a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
@@ -25,12 +25,6 @@
using namespace llvm;
-#define SYSTEMZ_COPYPHYSREGS_NAME "SystemZ Copy Physregs"
-
-namespace llvm {
- void initializeSystemZCopyPhysRegsPass(PassRegistry&);
-}
-
namespace {
class SystemZCopyPhysRegs : public MachineFunctionPass {
@@ -41,8 +35,6 @@ public:
initializeSystemZCopyPhysRegsPass(*PassRegistry::getPassRegistry());
}
- StringRef getPassName() const override { return SYSTEMZ_COPYPHYSREGS_NAME; }
-
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -59,7 +51,7 @@ char SystemZCopyPhysRegs::ID = 0;
} // end anonymous namespace
INITIALIZE_PASS(SystemZCopyPhysRegs, "systemz-copy-physregs",
- SYSTEMZ_COPYPHYSREGS_NAME, false, false)
+ "SystemZ Copy Physregs", false, false)
FunctionPass *llvm::createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM) {
return new SystemZCopyPhysRegs();
diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 631cbff303e8..4893acc81335 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -65,11 +65,8 @@ class SystemZElimCompare : public MachineFunctionPass {
public:
static char ID;
- SystemZElimCompare(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override {
- return "SystemZ Comparison Elimination";
+ SystemZElimCompare() : MachineFunctionPass(ID) {
+ initializeSystemZElimComparePass(*PassRegistry::getPassRegistry());
}
bool processBlock(MachineBasicBlock &MBB);
@@ -106,6 +103,9 @@ char SystemZElimCompare::ID = 0;
} // end anonymous namespace
+INITIALIZE_PASS(SystemZElimCompare, DEBUG_TYPE,
+ "SystemZ Comparison Elimination", false, false)
+
// Returns true if MI is an instruction whose output equals the value in Reg.
static bool preservesValueOf(MachineInstr &MI, unsigned Reg) {
switch (MI.getOpcode()) {
@@ -746,5 +746,5 @@ bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
}
FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) {
- return new SystemZElimCompare(TM);
+ return new SystemZElimCompare();
}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 99ab4c5455d6..ccc7d0737f53 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -103,7 +103,7 @@ bool SystemZELFFrameLowering::assignCalleeSavedSpillSlots(
unsigned HighGPR = SystemZ::R15D;
int StartSPOffset = SystemZMC::ELFCallFrameSize;
for (auto &CS : CSI) {
- unsigned Reg = CS.getReg();
+ Register Reg = CS.getReg();
int Offset = getRegSpillOffset(MF, Reg);
if (Offset) {
if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) {
@@ -124,7 +124,7 @@ bool SystemZELFFrameLowering::assignCalleeSavedSpillSlots(
// Also save the GPR varargs, if any. R6D is call-saved, so would
// already be included, but we also need to handle the call-clobbered
// argument registers.
- unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
+ Register FirstGPR = ZFI->getVarArgsFirstGPR();
if (FirstGPR < SystemZ::ELFNumArgGPRs) {
unsigned Reg = SystemZ::ELFArgGPRs[FirstGPR];
int Offset = getRegSpillOffset(MF, Reg);
@@ -143,7 +143,7 @@ bool SystemZELFFrameLowering::assignCalleeSavedSpillSlots(
for (auto &CS : CSI) {
if (CS.getFrameIdx() != INT32_MAX)
continue;
- unsigned Reg = CS.getReg();
+ Register Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
unsigned Size = TRI->getSpillSize(*RC);
CurrOffset -= Size;
@@ -271,7 +271,7 @@ bool SystemZELFFrameLowering::spillCalleeSavedRegisters(
// Make sure all call-saved GPRs are included as operands and are
// marked as live on entry.
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (SystemZ::GR64BitRegClass.contains(Reg))
addSavedGPR(MBB, MIB, Reg, true);
}
@@ -284,7 +284,7 @@ bool SystemZELFFrameLowering::spillCalleeSavedRegisters(
// Save FPRs/VRs in the normal TargetInstrInfo way.
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (SystemZ::FP64BitRegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
@@ -314,7 +314,7 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters(
// Restore FPRs/VRs in the normal TargetInstrInfo way.
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (SystemZ::FP64BitRegClass.contains(Reg))
TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(),
&SystemZ::FP64BitRegClass, TRI);
@@ -346,7 +346,7 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters(
// Do a second scan adding regs as being defined by instruction
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (Reg != RestoreGPRs.LowGPR && Reg != RestoreGPRs.HighGPR &&
SystemZ::GR64BitRegClass.contains(Reg))
MIB.addReg(Reg, RegState::ImplicitDefine);
@@ -500,7 +500,7 @@ void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF,
// Add CFI for the GPR saves.
for (auto &Save : CSI) {
- unsigned Reg = Save.getReg();
+ Register Reg = Save.getReg();
if (SystemZ::GR64BitRegClass.contains(Reg)) {
int FI = Save.getFrameIdx();
int64_t Offset = MFFrame.getObjectOffset(FI);
@@ -580,7 +580,7 @@ void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF,
// Skip over the FPR/VR saves.
SmallVector<unsigned, 8> CFIIndexes;
for (auto &Save : CSI) {
- unsigned Reg = Save.getReg();
+ Register Reg = Save.getReg();
if (SystemZ::FP64BitRegClass.contains(Reg)) {
if (MBBI != MBB.end() &&
(MBBI->getOpcode() == SystemZ::STD ||
@@ -764,8 +764,7 @@ void SystemZELFFrameLowering::inlineStackProbe(
bool SystemZELFFrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
- MF.getFrameInfo().hasVarSizedObjects() ||
- MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
+ MF.getFrameInfo().hasVarSizedObjects());
}
StackOffset SystemZELFFrameLowering::getFrameIndexReference(
@@ -850,7 +849,7 @@ bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots(
auto ProcessCSI = [&](std::vector<CalleeSavedInfo> &CSIList) {
for (auto &CS : CSIList) {
- unsigned Reg = CS.getReg();
+ Register Reg = CS.getReg();
int Offset = RegSpillOffsets[Reg];
if (Offset >= 0) {
if (GRRegClass.contains(Reg)) {
@@ -895,7 +894,7 @@ bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots(
for (auto &CS : CSI) {
if (CS.getFrameIdx() != INT32_MAX)
continue;
- unsigned Reg = CS.getReg();
+ Register Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
Align Alignment = TRI->getSpillAlign(*RC);
unsigned Size = TRI->getSpillSize(*RC);
@@ -966,7 +965,7 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
// marked as live on entry.
auto &GRRegClass = SystemZ::GR64BitRegClass;
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (GRRegClass.contains(Reg))
addSavedGPR(MBB, MIB, Reg, true);
}
@@ -974,7 +973,7 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
// Spill FPRs to the stack in the normal TargetInstrInfo way
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (SystemZ::FP64BitRegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
@@ -1007,7 +1006,7 @@ bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters(
// Restore FPRs in the normal TargetInstrInfo way.
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ Register Reg = CSI[I].getReg();
if (SystemZ::FP64BitRegClass.contains(Reg))
TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
&SystemZ::FP64BitRegClass, TRI);
@@ -1041,7 +1040,7 @@ bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters(
// Do a second scan adding regs as being defined by instruction
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ Register Reg = CSI[I].getReg();
if (Reg > RestoreGPRs.LowGPR && Reg < RestoreGPRs.HighGPR)
MIB.addReg(Reg, RegState::ImplicitDefine);
}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 106b9e8ebe06..3a1af888d8f9 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -17,7 +17,6 @@
#include "llvm/Support/TypeSize.h"
namespace llvm {
-class SystemZTargetMachine;
class SystemZSubtarget;
class SystemZFrameLowering : public TargetFrameLowering {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 39a82e2c07e0..cf55318d328d 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -62,8 +62,7 @@ struct SystemZAddressingMode {
bool IncludesDynAlloc;
SystemZAddressingMode(AddrForm form, DispRange dr)
- : Form(form), DR(dr), Base(), Disp(0), Index(),
- IncludesDynAlloc(false) {}
+ : Form(form), DR(dr), Disp(0), IncludesDynAlloc(false) {}
// True if the address can have an index register.
bool hasIndexField() { return Form != FormBD; }
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 24de52850771..f10651d5c5d7 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -318,8 +318,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
- // Use custom expanders so that we can force the function to use
- // a frame pointer.
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
@@ -1571,7 +1569,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
int FI =
MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- unsigned VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
+ Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
&SystemZ::FP64BitRegClass);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
@@ -3417,7 +3415,7 @@ SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
}
// Return R14D, which has the return address. Mark it an implicit live-in.
- unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
+ Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
}
@@ -4194,7 +4192,6 @@ SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
auto *Regs = Subtarget->getSpecialRegisters();
- MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
report_fatal_error("Variable-sized stack allocations are not supported "
"in GHC calling convention");
@@ -4207,7 +4204,6 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
auto *Regs = Subtarget->getSpecialRegisters();
- MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
@@ -8318,13 +8314,11 @@ MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
// Add FPR/VR clobbers.
if (!NoFloat && (Control & 4) != 0) {
if (Subtarget.hasVector()) {
- for (int I = 0; I < 32; I++) {
- unsigned Reg = SystemZMC::VR128Regs[I];
+ for (unsigned Reg : SystemZMC::VR128Regs) {
MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
}
} else {
- for (int I = 0; I < 16; I++) {
- unsigned Reg = SystemZMC::FP64Regs[I];
+ for (unsigned Reg : SystemZMC::FP64Regs) {
MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
}
}
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 940c0a857ea4..a8ddb8c62d18 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -381,7 +381,6 @@ enum {
} // end namespace SystemZICMP
class SystemZSubtarget;
-class SystemZTargetMachine;
class SystemZTargetLowering : public TargetLowering {
public:
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index e80496e37781..6db9bf3056b7 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1309,7 +1309,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
// allocated regs are in an FP reg-class per previous check above.
for (const MachineOperand &MO : MIB->operands())
if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (MRI.getRegClass(Reg) == &SystemZ::VR32BitRegClass)
MRI.setRegClass(Reg, &SystemZ::FP32BitRegClass);
else if (MRI.getRegClass(Reg) == &SystemZ::VR64BitRegClass)
diff --git a/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
index 06d893d043e9..d6c795985448 100644
--- a/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -29,11 +29,8 @@ namespace {
class SystemZLDCleanup : public MachineFunctionPass {
public:
static char ID;
- SystemZLDCleanup(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {}
-
- StringRef getPassName() const override {
- return "SystemZ Local Dynamic TLS Access Clean-up";
+ SystemZLDCleanup() : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {
+ initializeSystemZLDCleanupPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -52,8 +49,11 @@ char SystemZLDCleanup::ID = 0;
} // end anonymous namespace
+INITIALIZE_PASS(SystemZLDCleanup, "systemz-ld-cleanup",
+ "SystemZ Local Dynamic TLS Access Clean-up", false, false)
+
FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) {
- return new SystemZLDCleanup(TM);
+ return new SystemZLDCleanup();
}
void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index 9c985c16f082..d53693154d40 100644
--- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -135,10 +135,9 @@ class SystemZLongBranch : public MachineFunctionPass {
public:
static char ID;
- SystemZLongBranch(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override { return "SystemZ Long Branch"; }
+ SystemZLongBranch() : MachineFunctionPass(ID) {
+ initializeSystemZLongBranchPass(*PassRegistry::getPassRegistry());
+ }
bool runOnMachineFunction(MachineFunction &F) override;
@@ -174,6 +173,9 @@ const uint64_t MaxForwardRange = 0xfffe;
} // end anonymous namespace
+INITIALIZE_PASS(SystemZLongBranch, DEBUG_TYPE, "SystemZ Long Branch", false,
+ false)
+
// Position describes the state immediately before Block. Update Block
// accordingly and move Position to the end of the block's non-terminator
// instructions.
@@ -481,5 +483,5 @@ bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
}
FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
- return new SystemZLongBranch(TM);
+ return new SystemZLongBranch();
}
diff --git a/llvm/lib/Target/SystemZ/SystemZMCInstLower.h b/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
index 14ad06488312..eb09033d1850 100644
--- a/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
+++ b/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -18,7 +18,6 @@ class MCInst;
class MCOperand;
class MachineInstr;
class MachineOperand;
-class Mangler;
class SystemZAsmPrinter;
class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower {
diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index f755d5cd3d5b..ec4b812eb0e1 100644
--- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -34,14 +34,12 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
unsigned VarArgsFrameIndex;
unsigned RegSaveFrameIndex;
int FramePointerSaveIndex;
- bool ManipulatesSP;
unsigned NumLocalDynamics;
public:
explicit SystemZMachineFunctionInfo(MachineFunction &MF)
: VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0),
- RegSaveFrameIndex(0), FramePointerSaveIndex(0), ManipulatesSP(false),
- NumLocalDynamics(0) {}
+ RegSaveFrameIndex(0), FramePointerSaveIndex(0), NumLocalDynamics(0) {}
// Get and set the first and last call-saved GPR that should be saved by
// this function and the SP offset for the STMG. These are 0 if no GPRs
@@ -85,11 +83,6 @@ public:
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
- // Get and set whether the function directly manipulates the stack pointer,
- // e.g. through STACKSAVE or STACKRESTORE.
- bool getManipulatesSP() const { return ManipulatesSP; }
- void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; }
-
// Count number of local-dynamic TLS symbols used.
unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
diff --git a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
index aaa7f8fc88f5..5a2cfc53da49 100644
--- a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
@@ -21,16 +21,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
-#define SYSTEMZ_POSTREWRITE_NAME "SystemZ Post Rewrite pass"
-
#define DEBUG_TYPE "systemz-postrewrite"
STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops.");
STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)");
-namespace llvm {
- void initializeSystemZPostRewritePass(PassRegistry&);
-}
-
namespace {
class SystemZPostRewrite : public MachineFunctionPass {
@@ -44,8 +38,6 @@ public:
bool runOnMachineFunction(MachineFunction &Fn) override;
- StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; }
-
private:
void selectLOCRMux(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -70,7 +62,7 @@ char SystemZPostRewrite::ID = 0;
} // end anonymous namespace
INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite",
- SYSTEMZ_POSTREWRITE_NAME, false, false)
+ "SystemZ Post Rewrite pass", false, false)
/// Returns an instance of the Post Rewrite pass.
FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) {
@@ -178,15 +170,15 @@ bool SystemZPostRewrite::expandCondMove(MachineBasicBlock &MBB,
MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB);
RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end());
RestMBB->transferSuccessors(&MBB);
- for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
- RestMBB->addLiveIn(*I);
+ for (MCPhysReg R : LiveRegs)
+ RestMBB->addLiveIn(R);
// Create a new block MoveMBB to hold the move instruction.
MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB);
MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB);
MoveMBB->addLiveIn(SrcReg);
- for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
- MoveMBB->addLiveIn(*I);
+ for (MCPhysReg R : LiveRegs)
+ MoveMBB->addLiveIn(R);
// At the end of MBB, create a conditional branch to RestMBB if the
// condition is false, otherwise fall through to MoveMBB.
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index a4a5b1fbdf90..da6725777e43 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -17,8 +17,6 @@
namespace llvm {
-class SystemZTargetMachine;
-
class SystemZSelectionDAGInfo : public SelectionDAGTargetInfo {
public:
explicit SystemZSelectionDAGInfo() = default;
diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index 254e5e92449b..92930dad80ef 100644
--- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -26,11 +26,7 @@ namespace {
class SystemZShortenInst : public MachineFunctionPass {
public:
static char ID;
- SystemZShortenInst(const SystemZTargetMachine &tm);
-
- StringRef getPassName() const override {
- return "SystemZ Instruction Shortening";
- }
+ SystemZShortenInst();
bool processBlock(MachineBasicBlock &MBB);
bool runOnMachineFunction(MachineFunction &F) override;
@@ -56,12 +52,17 @@ private:
char SystemZShortenInst::ID = 0;
} // end anonymous namespace
+INITIALIZE_PASS(SystemZShortenInst, DEBUG_TYPE,
+ "SystemZ Instruction Shortening", false, false)
+
FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) {
- return new SystemZShortenInst(TM);
+ return new SystemZShortenInst();
}
-SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr) {}
+SystemZShortenInst::SystemZShortenInst()
+ : MachineFunctionPass(ID), TII(nullptr) {
+ initializeSystemZShortenInstPass(*PassRegistry::getPassRegistry());
+}
// Tie operands if MI has become a two-address instruction.
static void tieOpsIfNeeded(MachineInstr &MI) {
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 0f03d96655bf..75c0d454d904 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -89,7 +89,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasSoftFloat(false), TargetTriple(TT),
SpecialRegisters(initializeSpecialRegisters()),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- TSInfo(), FrameLowering(SystemZFrameLowering::create(*this)) {}
+ FrameLowering(SystemZFrameLowering::create(*this)) {}
bool SystemZSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 67c5b8eb09b6..98f7094fcb48 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -85,7 +85,7 @@ private:
SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU,
StringRef FS);
- SystemZCallingConventionRegisters *initializeSpecialRegisters(void);
+ SystemZCallingConventionRegisters *initializeSpecialRegisters();
public:
SystemZSubtarget(const Triple &TT, const std::string &CPU,
diff --git a/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/llvm/lib/Target/SystemZ/SystemZTDC.cpp
index 7cb7dca2ea28..f62afb8ddfcf 100644
--- a/llvm/lib/Target/SystemZ/SystemZTDC.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTDC.cpp
@@ -61,10 +61,6 @@
using namespace llvm;
-namespace llvm {
- void initializeSystemZTDCPassPass(PassRegistry&);
-}
-
namespace {
class SystemZTDCPass : public FunctionPass {
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index deb3358102ed..f1469fe8f56b 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -32,6 +32,14 @@ using namespace llvm;
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() {
// Register the target.
RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget());
+ auto &PR = *PassRegistry::getPassRegistry();
+ initializeSystemZElimComparePass(PR);
+ initializeSystemZShortenInstPass(PR);
+ initializeSystemZLongBranchPass(PR);
+ initializeSystemZLDCleanupPass(PR);
+ initializeSystemZShortenInstPass(PR);
+ initializeSystemZPostRewritePass(PR);
+ initializeSystemZTDCPassPass(PR);
}
// Determine whether we use the vector ABI.
diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
index fd9dc32b04f5..4a318e493c52 100644
--- a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
+++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -210,7 +210,7 @@ private:
};
public:
- VEOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ VEOperand(KindTy K) : Kind(K) {}
bool isToken() const override { return Kind == k_Token; }
bool isReg() const override { return Kind == k_Register; }
diff --git a/llvm/lib/Target/VE/LVLGen.cpp b/llvm/lib/Target/VE/LVLGen.cpp
index c4588926af9e..4db6a59284c2 100644
--- a/llvm/lib/Target/VE/LVLGen.cpp
+++ b/llvm/lib/Target/VE/LVLGen.cpp
@@ -125,8 +125,8 @@ bool LVLGen::runOnMachineFunction(MachineFunction &F) {
TII = Subtarget.getInstrInfo();
TRI = Subtarget.getRegisterInfo();
- for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- Changed |= runOnMachineBasicBlock(*FI);
+ for (MachineBasicBlock &MBB : F)
+ Changed |= runOnMachineBasicBlock(MBB);
if (Changed) {
LLVM_DEBUG(dbgs() << "\n");
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
index 7fb8a556aa74..f0bb6e3acdee 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h
@@ -27,10 +27,6 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class MCTargetOptions;
class Target;
-class Triple;
-class StringRef;
-class raw_pwrite_stream;
-class raw_ostream;
MCCodeEmitter *createVEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI, MCContext &Ctx);
diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h
index 8c1fa840f19c..2a729a1a311c 100644
--- a/llvm/lib/Target/VE/VE.h
+++ b/llvm/lib/Target/VE/VE.h
@@ -22,7 +22,6 @@
namespace llvm {
class FunctionPass;
class VETargetMachine;
-class formatted_raw_ostream;
class AsmPrinter;
class MCInst;
class MachineInstr;
diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp
new file mode 100644
index 000000000000..af3e4af13814
--- /dev/null
+++ b/llvm/lib/Target/VE/VECustomDAG.cpp
@@ -0,0 +1,81 @@
+//===-- VECustomDAG.h - VE Custom DAG Nodes ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that VE uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VECustomDAG.h"
+
+#ifndef DEBUG_TYPE
+#define DEBUG_TYPE "vecustomdag"
+#endif
+
+namespace llvm {
+
+static const int StandardVectorWidth = 256;
+
+bool isPackedVectorType(EVT SomeVT) {
+ if (!SomeVT.isVector())
+ return false;
+ return SomeVT.getVectorNumElements() > StandardVectorWidth;
+}
+
+/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
+Optional<unsigned> getVVPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \
+ case ISD::VPOPC: \
+ return VEISD::VVPNAME;
+#define ADD_VVP_OP(VVPNAME, SDNAME) \
+ case VEISD::VVPNAME: \
+ case ISD::SDNAME: \
+ return VEISD::VVPNAME;
+#include "VVPNodes.def"
+ }
+ return None;
+}
+
+bool isVVPBinaryOp(unsigned VVPOpcode) {
+ switch (VVPOpcode) {
+#define ADD_BINARY_VVP_OP(VVPNAME, ...) \
+ case VEISD::VVPNAME: \
+ return true;
+#include "VVPNodes.def"
+ }
+ return false;
+}
+
+SDValue VECustomDAG::getConstant(uint64_t Val, EVT VT, bool IsTarget,
+ bool IsOpaque) const {
+ return DAG.getConstant(Val, DL, VT, IsTarget, IsOpaque);
+}
+
+SDValue VECustomDAG::getBroadcast(EVT ResultVT, SDValue Scalar,
+ SDValue AVL) const {
+ assert(ResultVT.isVector());
+ auto ScaVT = Scalar.getValueType();
+ assert(ScaVT != MVT::i1 && "TODO: Mask broadcasts");
+
+ if (isPackedVectorType(ResultVT)) {
+ // v512x packed mode broadcast
+ // Replicate the scalar reg (f32 or i32) onto the opposing half of the full
+ // scalar register. If it's an I64 type, assume that this has already
+ // happened.
+ if (ScaVT == MVT::f32) {
+ Scalar = getNode(VEISD::REPL_F32, MVT::i64, Scalar);
+ } else if (ScaVT == MVT::i32) {
+ Scalar = getNode(VEISD::REPL_I32, MVT::i64, Scalar);
+ }
+ }
+
+ return getNode(VEISD::VEC_BROADCAST, ResultVT, {Scalar, AVL});
+}
+
+} // namespace llvm
diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h
new file mode 100644
index 000000000000..ddd6ce783366
--- /dev/null
+++ b/llvm/lib/Target/VE/VECustomDAG.h
@@ -0,0 +1,79 @@
+//===------------ VECustomDAG.h - VE Custom DAG Nodes -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the helper functions that VE uses to lower LLVM code into a
+// selection DAG. For example, hiding SDLoc, and easy to use SDNodeFlags.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_VE_VECUSTOMDAG_H
+#define LLVM_LIB_TARGET_VE_VECUSTOMDAG_H
+
+#include "VE.h"
+#include "VEISelLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
+
+namespace llvm {
+
+Optional<unsigned> getVVPOpcode(unsigned Opcode);
+
+bool isVVPBinaryOp(unsigned Opcode);
+
+bool isPackedVectorType(EVT SomeVT);
+
+class VECustomDAG {
+ SelectionDAG &DAG;
+ SDLoc DL;
+
+public:
+ SelectionDAG *getDAG() const { return &DAG; }
+
+ VECustomDAG(SelectionDAG &DAG, SDLoc DL) : DAG(DAG), DL(DL) {}
+
+ VECustomDAG(SelectionDAG &DAG, SDValue WhereOp) : DAG(DAG), DL(WhereOp) {}
+
+ VECustomDAG(SelectionDAG &DAG, const SDNode *WhereN) : DAG(DAG), DL(WhereN) {}
+
+ /// getNode {
+ SDValue getNode(unsigned OC, SDVTList VTL, ArrayRef<SDValue> OpV,
+ Optional<SDNodeFlags> Flags = None) const {
+ auto N = DAG.getNode(OC, DL, VTL, OpV);
+ if (Flags)
+ N->setFlags(*Flags);
+ return N;
+ }
+
+ SDValue getNode(unsigned OC, ArrayRef<EVT> ResVT, ArrayRef<SDValue> OpV,
+ Optional<SDNodeFlags> Flags = None) const {
+ auto N = DAG.getNode(OC, DL, ResVT, OpV);
+ if (Flags)
+ N->setFlags(*Flags);
+ return N;
+ }
+
+ SDValue getNode(unsigned OC, EVT ResVT, ArrayRef<SDValue> OpV,
+ Optional<SDNodeFlags> Flags = None) const {
+ auto N = DAG.getNode(OC, DL, ResVT, OpV);
+ if (Flags)
+ N->setFlags(*Flags);
+ return N;
+ }
+
+ SDValue getUNDEF(EVT VT) const { return DAG.getUNDEF(VT); }
+ /// } getNode
+
+ SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false,
+ bool IsOpaque = false) const;
+
+ SDValue getBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_VE_VECUSTOMDAG_H
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 5ef223d6030b..9137c476777e 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -13,6 +13,7 @@
#include "VEISelLowering.h"
#include "MCTargetDesc/VEMCExpr.h"
+#include "VECustomDAG.h"
#include "VEInstrBuilder.h"
#include "VEMachineFunctionInfo.h"
#include "VERegisterInfo.h"
@@ -419,7 +420,7 @@ SDValue VETargetLowering::LowerFormalArguments(
// All integer register arguments are promoted by the caller to i64.
// Create a virtual register for the promoted live-in value.
- unsigned VReg =
+ Register VReg =
MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
@@ -754,7 +755,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(!VA.needsCustom() && "Unexpected custom lowering");
- unsigned Reg = VA.getLocReg();
+ Register Reg = VA.getLocReg();
// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
// reside in the same register in the high and low bits. Reuse the
@@ -898,6 +899,8 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
TARGET_NODE_CASE(RET_FLAG)
TARGET_NODE_CASE(TS1AM)
TARGET_NODE_CASE(VEC_BROADCAST)
+ TARGET_NODE_CASE(REPL_I32)
+ TARGET_NODE_CASE(REPL_F32)
// Register the VVP_* SDNodes.
#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
@@ -1545,7 +1548,7 @@ static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
unsigned Depth = Op.getConstantOperandVal(0);
const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
- unsigned FrameReg = RegInfo->getFrameRegister(MF);
+ Register FrameReg = RegInfo->getFrameRegister(MF);
SDValue FrameAddr =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
while (Depth--)
@@ -1640,28 +1643,26 @@ static SDValue getSplatValue(SDNode *N) {
SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- SDLoc DL(Op);
- unsigned NumEls = Op.getValueType().getVectorNumElements();
- MVT ElemVT = Op.getSimpleValueType().getVectorElementType();
+ VECustomDAG CDAG(DAG, Op);
+ MVT ResultVT = Op.getSimpleValueType();
// If there is just one element, expand to INSERT_VECTOR_ELT.
unsigned UniqueIdx;
if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
- SDValue AccuV = DAG.getUNDEF(Op.getValueType());
+ SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
auto ElemV = Op->getOperand(UniqueIdx);
- SDValue IdxV = DAG.getConstant(UniqueIdx, DL, MVT::i64);
- return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), AccuV,
- ElemV, IdxV);
+ SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
+ return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
}
// Else emit a broadcast.
if (SDValue ScalarV = getSplatValue(Op.getNode())) {
- // lower to VEC_BROADCAST
- MVT LegalResVT = MVT::getVectorVT(ElemVT, 256);
-
- auto AVL = DAG.getConstant(NumEls, DL, MVT::i32);
- return DAG.getNode(VEISD::VEC_BROADCAST, DL, LegalResVT, Op.getOperand(0),
- AVL);
+ unsigned NumEls = ResultVT.getVectorNumElements();
+ // TODO: Legalize packed-mode AVL.
+ // For now, cap the AVL at 256.
+ auto CappedLength = std::min<unsigned>(256, NumEls);
+ auto AVL = CDAG.getConstant(CappedLength, MVT::i32);
+ return CDAG.getBroadcast(ResultVT, Op.getOperand(0), AVL);
}
// Expand
@@ -1720,7 +1721,7 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
-#define ADD_BINARY_VVP_OP(VVP_NAME, VP_NAME, ISD_NAME) case ISD::ISD_NAME:
+#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
#include "VVPNodes.def"
return lowerToVVP(Op, DAG);
}
@@ -2666,21 +2667,6 @@ bool VETargetLowering::hasAndNot(SDValue Y) const {
return true;
}
-/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
-static Optional<unsigned> getVVPOpcode(unsigned Opcode) {
- switch (Opcode) {
-#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \
- case ISD::VPOPC: \
- return VEISD::VVPNAME;
-#define ADD_VVP_OP(VVPNAME, SDNAME) \
- case VEISD::VVPNAME: \
- case ISD::SDNAME: \
- return VEISD::VVPNAME;
-#include "VVPNodes.def"
- }
- return None;
-}
-
SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
// Can we represent this as a VVP node.
const unsigned Opcode = Op->getOpcode();
@@ -2691,7 +2677,7 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
const bool FromVP = ISD::isVPOpcode(Opcode);
// The representative and legalized vector type of this operation.
- SDLoc DL(Op);
+ VECustomDAG CDAG(DAG, Op);
MVT MaskVT = MVT::v256i1; // TODO: packed mode.
EVT OpVecVT = Op.getValueType();
EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
@@ -2708,27 +2694,21 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
} else {
// Materialize the VL parameter.
- AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32);
- SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32);
- Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT,
- ConstTrue); // emit a VEISD::VEC_BROADCAST here.
+ AVL = CDAG.getConstant(OpVecVT.getVectorNumElements(), MVT::i32);
+ SDValue ConstTrue = CDAG.getConstant(1, MVT::i32);
+ Mask = CDAG.getBroadcast(MaskVT, ConstTrue, AVL);
}
- // Categories we are interested in.
- bool IsBinaryOp = false;
-
- switch (VVPOpcode) {
-#define ADD_BINARY_VVP_OP(VVPNAME, ...) \
- case VEISD::VVPNAME: \
- IsBinaryOp = true; \
- break;
-#include "VVPNodes.def"
- }
-
- if (IsBinaryOp) {
+ if (isVVPBinaryOp(VVPOpcode)) {
assert(LegalVecVT.isSimple());
- return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0),
- Op->getOperand(1), Mask, AVL);
+ return CDAG.getNode(VVPOpcode, LegalVecVT,
+ {Op->getOperand(0), Op->getOperand(1), Mask, AVL});
+ }
+ if (VVPOpcode == VEISD::VVP_SELECT) {
+ auto Mask = Op->getOperand(0);
+ auto OnTrue = Op->getOperand(1);
+ auto OnFalse = Op->getOperand(2);
+ return CDAG.getNode(VVPOpcode, LegalVecVT, {OnTrue, OnFalse, Mask, AVL});
}
llvm_unreachable("lowerToVVP called for unexpected SDNode.");
}
@@ -2750,7 +2730,7 @@ SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue Idx = Op.getOperand(1);
SDLoc DL(Op);
SDValue Result = Op;
- if (0 /* Idx->isConstant() */) {
+ if (false /* Idx->isConstant() */) {
// TODO: optimized implementation using constant values
} else {
SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
@@ -2808,7 +2788,7 @@ SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
SDValue Result = Op;
- if (0 /* Idx->isConstant()*/) {
+ if (false /* Idx->isConstant()*/) {
// TODO: optimized implementation using constant values
} else {
SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index b4ce8906fd51..09bd19e83717 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -40,6 +40,8 @@ enum NodeType : unsigned {
TS1AM, // A TS1AM instruction used for 1/2 bytes swap.
VEC_BROADCAST, // A vector broadcast instruction.
// 0: scalar value, 1: VL
+ REPL_I32,
+ REPL_F32, // Replicate subregister to other half.
// VVP_* nodes.
#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME,
@@ -219,4 +221,4 @@ public:
};
} // namespace llvm
-#endif // VE_ISELLOWERING_H
+#endif // LLVM_LIB_TARGET_VE_VEISELLOWERING_H
diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index 46846edfeafb..7c1bd5201867 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -248,7 +248,7 @@ unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI = &getRegisterInfo();
MachineFunction *MF = MBB.getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned Reg = Cond[2].getReg();
+ Register Reg = Cond[2].getReg();
if (IsIntegerCC(Cond[0].getImm())) {
if (TRI->getRegSizeInBits(Reg, MRI) == 32) {
opc[0] = VE::BRCFWir;
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index c3abbe2cafab..717427c3f48d 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -1576,6 +1576,12 @@ def f2l : OutPatFrag<(ops node:$exp),
def l2f : OutPatFrag<(ops node:$exp),
(EXTRACT_SUBREG $exp, sub_f32)>;
+// Zero out subregisters.
+def zero_i32 : OutPatFrag<(ops node:$expr),
+ (ANDrm $expr, 32)>;
+def zero_f32 : OutPatFrag<(ops node:$expr),
+ (ANDrm $expr, !add(32, 64))>;
+
// Small immediates.
def : Pat<(i32 simm7:$val), (EXTRACT_SUBREG (ORim (LO7 $val), 0), sub_i32)>;
def : Pat<(i64 simm7:$val), (ORim (LO7 $val), 0)>;
@@ -2287,6 +2293,16 @@ class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,
[SDTCisVec<0>, IsVLVT<2>]>>;
+// replicate lower 32bit to upper 32bit (f32 scalar replication).
+def repl_f32 : SDNode<"VEISD::REPL_F32",
+ SDTypeProfile<1, 1,
+ [SDTCisInt<0>, SDTCisFP<1>]>>;
+// replicate upper 32bit to lower 32 bit (i32 scalar replication).
+def repl_i32 : SDNode<"VEISD::REPL_I32",
+ SDTypeProfile<1, 1,
+ [SDTCisInt<0>, SDTCisInt<1>]>>;
+
+
// Whether this is an all-true mask (assuming undef-bits above VL are all-true).
def true_mask : PatLeaf<
(vec_broadcast (i32 nonzero), (i32 srcvalue))>;
diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td
index dc3c913c918a..6c5b80315efb 100644
--- a/llvm/lib/Target/VE/VEInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -15,6 +15,17 @@
// Instruction format superclass
//===----------------------------------------------------------------------===//
+// Sub-register replication for packed broadcast.
+def: Pat<(i64 (repl_f32 f32:$val)),
+ (ORrr
+ (SRLri (f2l $val), 32),
+ (zero_i32 (f2l $val)))>;
+def: Pat<(i64 (repl_i32 i32:$val)),
+ (ORrr
+ (zero_f32 (i2l $val)),
+ (SLLri (i2l $val), 32))>;
+
+
multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp,
SDNodeXForm ImmCast, OutPatFrag SuperRegCast> {
// VBRDil
@@ -89,3 +100,8 @@ defm : patterns_elem32<v256f32, f32, simm7fp, LO7FP, l2f, f2l>;
defm : patterns_elem64<v256i64, i64, simm7, LO7>;
defm : patterns_elem64<v256f64, f64, simm7fp, LO7FP>;
+
+defm : vbrd_elem64<v512i32, i64, simm7, LO7>;
+defm : vbrd_elem64<v512f32, i64, simm7, LO7>;
+defm : vbrd_elem64<v512i32, f64, simm7fp, LO7FP>;
+defm : vbrd_elem64<v512f32, f64, simm7fp, LO7FP>;
diff --git a/llvm/lib/Target/VE/VEMCInstLower.cpp b/llvm/lib/Target/VE/VEMCInstLower.cpp
index bc5577ce4f97..57195f238cf6 100644
--- a/llvm/lib/Target/VE/VEMCInstLower.cpp
+++ b/llvm/lib/Target/VE/VEMCInstLower.cpp
@@ -78,8 +78,7 @@ void llvm::LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP) {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp = LowerOperand(MI, MO, AP);
if (MCOp.isValid())
diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h
index 16b25fed3f11..3160f6a552d7 100644
--- a/llvm/lib/Target/VE/VEMachineFunctionInfo.h
+++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h
@@ -29,10 +29,9 @@ private:
bool IsLeafProc;
public:
- VEMachineFunctionInfo()
- : GlobalBaseReg(), VarArgsFrameOffset(0), IsLeafProc(false) {}
+ VEMachineFunctionInfo() : VarArgsFrameOffset(0), IsLeafProc(false) {}
explicit VEMachineFunctionInfo(MachineFunction &MF)
- : GlobalBaseReg(), VarArgsFrameOffset(0), IsLeafProc(false) {}
+ : VarArgsFrameOffset(0), IsLeafProc(false) {}
Register getGlobalBaseReg() const { return GlobalBaseReg; }
void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; }
diff --git a/llvm/lib/Target/VE/VESubtarget.h b/llvm/lib/Target/VE/VESubtarget.h
index 213aca2ea3f9..0c3dc0a08072 100644
--- a/llvm/lib/Target/VE/VESubtarget.h
+++ b/llvm/lib/Target/VE/VESubtarget.h
@@ -76,7 +76,7 @@ public:
/// Get the size of RSA, return address, and frame pointer as described
/// in VEFrameLowering.cpp.
- unsigned getRsaSize(void) const { return 176; };
+ unsigned getRsaSize() const { return 176; };
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
};
diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td
index 99566e91ec11..ef9c238066c0 100644
--- a/llvm/lib/Target/VE/VVPInstrInfo.td
+++ b/llvm/lib/Target/VE/VVPInstrInfo.td
@@ -39,6 +39,15 @@ def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
IsVLVT<4>
]>;
+// Select(OnTrue, OnFalse, SelMask, vl)
+def SDTSelectVVP : SDTypeProfile<1, 4, [ // vp_select, vp_merge
+ SDTCisVec<0>,
+ SDTCisSameNumEltsAs<0, 3>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ IsVLVT<4>
+]>;
+
// Binary operator commutative pattern.
class vvp_commutative<SDNode RootOp> :
PatFrags<
@@ -79,3 +88,5 @@ def c_vvp_fmul : vvp_commutative<vvp_fmul>;
def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
// } Binary Operators
+
+def vvp_select : SDNode<"VEISD::VVP_SELECT", SDTSelectVVP>;
diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
index 8d5d9d103547..74720fd1f419 100644
--- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -20,8 +20,22 @@ include "VVPInstrInfo.td"
multiclass Binary_rv<SDPatternOperator OpNode,
ValueType ScalarVT, ValueType DataVT,
ValueType MaskVT, string OpBaseName> {
- // Masked with select, broadcast.
- // TODO
+ // Masked with passthru, broadcast.
+ def : Pat<(vvp_select
+ (OpNode
+ (any_broadcast ScalarVT:$sx),
+ DataVT:$vy,
+ (MaskVT srcvalue),
+ (i32 srcvalue)),
+ DataVT:$vfalse,
+ MaskVT:$mask,
+ i32:$pivot),
+ (!cast<Instruction>(OpBaseName#"rvml_v")
+ ScalarVT:$sx,
+ $vy,
+ $mask,
+ $pivot,
+ $vfalse)>;
// Unmasked, broadcast.
def : Pat<(OpNode
@@ -42,8 +56,22 @@ multiclass Binary_rv<SDPatternOperator OpNode,
multiclass Binary_vr<SDPatternOperator OpNode,
ValueType ScalarVT, ValueType DataVT,
ValueType MaskVT, string OpBaseName> {
- // Masked with select, broadcast.
- // TODO
+ // Masked with passthru, broadcast.
+ def : Pat<(vvp_select
+ (OpNode
+ DataVT:$vx,
+ (any_broadcast ScalarVT:$sy),
+ (MaskVT srcvalue),
+ (i32 srcvalue)),
+ DataVT:$vfalse,
+ MaskVT:$mask,
+ i32:$pivot),
+ (!cast<Instruction>(OpBaseName#"vrml_v")
+ $vx,
+ ScalarVT:$sy,
+ $mask,
+ $pivot,
+ $vfalse)>;
// Unmasked, broadcast.
def : Pat<(OpNode
@@ -64,6 +92,23 @@ multiclass Binary_vr<SDPatternOperator OpNode,
multiclass Binary_vv<SDPatternOperator OpNode,
ValueType DataVT,
ValueType MaskVT, string OpBaseName> {
+ // Masked with passthru, broadcast.
+ def : Pat<(vvp_select
+ (OpNode
+ DataVT:$vx,
+ DataVT:$vy,
+ (MaskVT srcvalue),
+ (i32 srcvalue)),
+ DataVT:$vfalse,
+ MaskVT:$mask,
+ i32:$pivot),
+ (!cast<Instruction>(OpBaseName#"vvml_v")
+ $vx,
+ $vy,
+ $mask,
+ $pivot,
+ $vfalse)>;
+
// Masked with select.
// TODO
@@ -191,3 +236,35 @@ defm : Binary_rv_vv_ShortLong<vvp_fsub,
defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,
f64, v256f64, "VFDIVD",
f32, v256f32, "VFDIVS">;
+
+multiclass Merge_mvv<
+ SDPatternOperator OpNode,
+ ValueType DataVT, ValueType MaskVT,
+ string OpBaseName> {
+ // Masked.
+ def : Pat<(OpNode
+ DataVT:$vtrue, DataVT:$vfalse,
+ MaskVT:$vm,
+ i32:$avl),
+ (!cast<Instruction>(OpBaseName#"vvml_v")
+ $vfalse, $vtrue, $vm, $avl, $vfalse)>;
+}
+
+multiclass Merge_mvv_ShortLong<
+ SDPatternOperator OpNode,
+ ValueType LongDataVT, ValueType ShortDataVT,
+ string OpBaseName> {
+ defm : Merge_mvv<OpNode,
+ LongDataVT, v256i1,
+ OpBaseName>;
+ defm : Merge_mvv<OpNode,
+ ShortDataVT, v256i1,
+ OpBaseName>;
+}
+
+defm : Merge_mvv_ShortLong<vvp_select,
+ v256f64,
+ v256f32, "VMRG">;
+defm : Merge_mvv_ShortLong<vvp_select,
+ v256i64,
+ v256i32, "VMRG">;
diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def
index 8a9231f7d3e6..8000f84c5dbe 100644
--- a/llvm/lib/Target/VE/VVPNodes.def
+++ b/llvm/lib/Target/VE/VVPNodes.def
@@ -59,6 +59,11 @@ ADD_BINARY_VVP_OP_COMPACT(FSUB)
ADD_BINARY_VVP_OP_COMPACT(FMUL)
ADD_BINARY_VVP_OP_COMPACT(FDIV)
+// Shuffles.
+ADD_VVP_OP(VVP_SELECT,VSELECT)
+HANDLE_VP_TO_VVP(VP_SELECT, VVP_SELECT)
+HANDLE_VP_TO_VVP(VP_MERGE, VVP_SELECT)
+
#undef ADD_BINARY_VVP_OP
#undef ADD_BINARY_VVP_OP_COMPACT
#undef ADD_VVP_OP
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index b2f10ca93a4f..75d5d0675990 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -26,7 +26,6 @@ class MCAsmBackend;
class MCCodeEmitter;
class MCInstrInfo;
class MCObjectTargetWriter;
-class MVT;
class Triple;
MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
index d024185defb4..57e40f6cd8d7 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
@@ -25,7 +25,6 @@ class MachineInstr;
class MachineOperand;
class MCContext;
class MCSymbolWasm;
-class StringRef;
class WebAssemblyFunctionInfo;
class WebAssemblySubtarget;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 910a4e5e0d1a..eeec0fc671cc 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -406,7 +406,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// TODO: Sort the locals for better compression.
MFI.setNumLocals(CurLocal - MFI.getParams().size());
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
auto RL = Reg2Local.find(Reg);
if (RL == Reg2Local.end() || RL->second < MFI.getParams().size())
continue;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 642aa6b4028a..406edef8ff3f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -286,7 +286,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
}
if (S == 1 && Addr.isRegBase() && Addr.getReg() == 0) {
// An unscaled add of a register. Set it as the new base.
- unsigned Reg = getRegForValue(Op);
+ Register Reg = getRegForValue(Op);
if (Reg == 0)
return false;
Addr.setReg(Reg);
@@ -372,7 +372,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
if (Addr.isSet()) {
return false;
}
- unsigned Reg = getRegForValue(Obj);
+ Register Reg = getRegForValue(Obj);
if (Reg == 0)
return false;
Addr.setReg(Reg);
@@ -430,7 +430,7 @@ unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V,
}
Not = false;
- unsigned Reg = getRegForValue(V);
+ Register Reg = getRegForValue(V);
if (Reg == 0)
return 0;
return maskI1Value(Reg, V);
@@ -458,12 +458,12 @@ unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
return 0;
}
- unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
+ Register Imm = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::CONST_I32), Imm)
.addImm(~(~uint64_t(0) << MVT(From).getSizeInBits()));
- unsigned Result = createResultReg(&WebAssembly::I32RegClass);
+ Register Result = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::AND_I32), Result)
.addReg(Reg)
@@ -488,18 +488,18 @@ unsigned WebAssemblyFastISel::signExtendToI32(unsigned Reg, const Value *V,
return 0;
}
- unsigned Imm = createResultReg(&WebAssembly::I32RegClass);
+ Register Imm = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::CONST_I32), Imm)
.addImm(32 - MVT(From).getSizeInBits());
- unsigned Left = createResultReg(&WebAssembly::I32RegClass);
+ Register Left = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::SHL_I32), Left)
.addReg(Reg)
.addReg(Imm);
- unsigned Right = createResultReg(&WebAssembly::I32RegClass);
+ Register Right = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::SHR_S_I32), Right)
.addReg(Left)
@@ -517,7 +517,7 @@ unsigned WebAssemblyFastISel::zeroExtend(unsigned Reg, const Value *V,
Reg = zeroExtendToI32(Reg, V, From);
- unsigned Result = createResultReg(&WebAssembly::I64RegClass);
+ Register Result = createResultReg(&WebAssembly::I64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::I64_EXTEND_U_I32), Result)
.addReg(Reg);
@@ -539,7 +539,7 @@ unsigned WebAssemblyFastISel::signExtend(unsigned Reg, const Value *V,
Reg = signExtendToI32(Reg, V, From);
- unsigned Result = createResultReg(&WebAssembly::I64RegClass);
+ Register Result = createResultReg(&WebAssembly::I64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::I64_EXTEND_S_I32), Result)
.addReg(Reg);
@@ -555,7 +555,7 @@ unsigned WebAssemblyFastISel::signExtend(unsigned Reg, const Value *V,
unsigned WebAssemblyFastISel::getRegForUnsignedValue(const Value *V) {
MVT::SimpleValueType From = getSimpleType(V->getType());
MVT::SimpleValueType To = getLegalType(From);
- unsigned VReg = getRegForValue(V);
+ Register VReg = getRegForValue(V);
if (VReg == 0)
return 0;
return zeroExtend(VReg, V, From, To);
@@ -564,7 +564,7 @@ unsigned WebAssemblyFastISel::getRegForUnsignedValue(const Value *V) {
unsigned WebAssemblyFastISel::getRegForSignedValue(const Value *V) {
MVT::SimpleValueType From = getSimpleType(V->getType());
MVT::SimpleValueType To = getLegalType(From);
- unsigned VReg = getRegForValue(V);
+ Register VReg = getRegForValue(V);
if (VReg == 0)
return 0;
return signExtend(VReg, V, From, To);
@@ -578,7 +578,7 @@ unsigned WebAssemblyFastISel::getRegForPromotedValue(const Value *V,
unsigned WebAssemblyFastISel::notValue(unsigned Reg) {
assert(MRI.getRegClass(Reg) == &WebAssembly::I32RegClass);
- unsigned NotReg = createResultReg(&WebAssembly::I32RegClass);
+ Register NotReg = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::EQZ_I32), NotReg)
.addReg(Reg);
@@ -586,7 +586,7 @@ unsigned WebAssemblyFastISel::notValue(unsigned Reg) {
}
unsigned WebAssemblyFastISel::copyValue(unsigned Reg) {
- unsigned ResultReg = createResultReg(MRI.getRegClass(Reg));
+ Register ResultReg = createResultReg(MRI.getRegClass(Reg));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(WebAssembly::COPY),
ResultReg)
.addReg(Reg);
@@ -598,7 +598,7 @@ unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- unsigned ResultReg =
+ Register ResultReg =
createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
: &WebAssembly::I32RegClass);
unsigned Opc =
@@ -617,7 +617,7 @@ unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) {
return 0;
if (GV->isThreadLocal())
return 0;
- unsigned ResultReg =
+ Register ResultReg =
createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
: &WebAssembly::I32RegClass);
unsigned Opc = Subtarget->hasAddr64() ? WebAssembly::CONST_I64
@@ -715,7 +715,7 @@ bool WebAssemblyFastISel::fastLowerArguments() {
default:
return false;
}
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(I);
updateValueMap(&Arg, ResultReg);
@@ -887,7 +887,7 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
if (Subtarget->hasAddr64()) {
auto Wrap = BuildMI(*FuncInfo.MBB, std::prev(FuncInfo.InsertPt), DbgLoc,
TII.get(WebAssembly::I32_WRAP_I64));
- unsigned Reg32 = createResultReg(&WebAssembly::I32RegClass);
+ Register Reg32 = createResultReg(&WebAssembly::I32RegClass);
Wrap.addReg(Reg32, RegState::Define);
Wrap.addReg(CalleeReg);
CalleeReg = Reg32;
@@ -914,11 +914,11 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
if (CondReg == 0)
return false;
- unsigned TrueReg = getRegForValue(Select->getTrueValue());
+ Register TrueReg = getRegForValue(Select->getTrueValue());
if (TrueReg == 0)
return false;
- unsigned FalseReg = getRegForValue(Select->getFalseValue());
+ Register FalseReg = getRegForValue(Select->getFalseValue());
if (FalseReg == 0)
return false;
@@ -959,7 +959,7 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
return false;
}
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(TrueReg)
.addReg(FalseReg)
@@ -972,12 +972,12 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
bool WebAssemblyFastISel::selectTrunc(const Instruction *I) {
const auto *Trunc = cast<TruncInst>(I);
- unsigned Reg = getRegForValue(Trunc->getOperand(0));
+ Register Reg = getRegForValue(Trunc->getOperand(0));
if (Reg == 0)
return false;
if (Trunc->getOperand(0)->getType()->isIntegerTy(64)) {
- unsigned Result = createResultReg(&WebAssembly::I32RegClass);
+ Register Result = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(WebAssembly::I32_WRAP_I64), Result)
.addReg(Reg);
@@ -994,7 +994,7 @@ bool WebAssemblyFastISel::selectZExt(const Instruction *I) {
const Value *Op = ZExt->getOperand(0);
MVT::SimpleValueType From = getSimpleType(Op->getType());
MVT::SimpleValueType To = getLegalType(getSimpleType(ZExt->getType()));
- unsigned In = getRegForValue(Op);
+ Register In = getRegForValue(Op);
if (In == 0)
return false;
unsigned Reg = zeroExtend(In, Op, From, To);
@@ -1011,7 +1011,7 @@ bool WebAssemblyFastISel::selectSExt(const Instruction *I) {
const Value *Op = SExt->getOperand(0);
MVT::SimpleValueType From = getSimpleType(Op->getType());
MVT::SimpleValueType To = getLegalType(getSimpleType(SExt->getType()));
- unsigned In = getRegForValue(Op);
+ Register In = getRegForValue(Op);
if (In == 0)
return false;
unsigned Reg = signExtend(In, Op, From, To);
@@ -1075,7 +1075,7 @@ bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
if (RHS == 0)
return false;
- unsigned ResultReg = createResultReg(&WebAssembly::I32RegClass);
+ Register ResultReg = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(LHS)
.addReg(RHS);
@@ -1086,11 +1086,11 @@ bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
bool WebAssemblyFastISel::selectFCmp(const Instruction *I) {
const auto *FCmp = cast<FCmpInst>(I);
- unsigned LHS = getRegForValue(FCmp->getOperand(0));
+ Register LHS = getRegForValue(FCmp->getOperand(0));
if (LHS == 0)
return false;
- unsigned RHS = getRegForValue(FCmp->getOperand(1));
+ Register RHS = getRegForValue(FCmp->getOperand(1));
if (RHS == 0)
return false;
@@ -1136,7 +1136,7 @@ bool WebAssemblyFastISel::selectFCmp(const Instruction *I) {
return false;
}
- unsigned ResultReg = createResultReg(&WebAssembly::I32RegClass);
+ Register ResultReg = createResultReg(&WebAssembly::I32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(LHS)
.addReg(RHS);
@@ -1157,7 +1157,7 @@ bool WebAssemblyFastISel::selectBitCast(const Instruction *I) {
if (!VT.isSimple() || !RetVT.isSimple())
return false;
- unsigned In = getRegForValue(I->getOperand(0));
+ Register In = getRegForValue(I->getOperand(0));
if (In == 0)
return false;
@@ -1229,7 +1229,7 @@ bool WebAssemblyFastISel::selectLoad(const Instruction *I) {
materializeLoadStoreOperands(Addr);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
ResultReg);
@@ -1284,7 +1284,7 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
materializeLoadStoreOperands(Addr);
- unsigned ValueReg = getRegForValue(Store->getValueOperand());
+ Register ValueReg = getRegForValue(Store->getValueOperand());
if (ValueReg == 0)
return false;
if (VTIsi1)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 38ed4c73fb93..a221f37cfd94 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1491,8 +1491,7 @@ bool WebAssemblyTargetLowering::MatchTableForLowering(SelectionDAG &DAG,
if (GA) {
// We are in Case 2 above.
Idx = Base->getOperand(1);
- if (!Idx || GA->getNumValues() != 1 || Idx->getNumValues() != 1)
- return false;
+ assert(GA->getNumValues() == 1);
} else {
// This might be Case 1 above (or an error)
SDValue V = Base->getOperand(0);
@@ -1629,7 +1628,7 @@ SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
// local.copy between Op and its FI operand.
SDValue Chain = Op.getOperand(0);
SDLoc DL(Op);
- unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
+ Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
EVT VT = Src.getValueType();
SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
: WebAssembly::COPY_I64,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 23aaa5160abd..fe656753889f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -279,6 +279,7 @@
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
@@ -454,12 +455,12 @@ static Function *getEmscriptenFunction(FunctionType *Ty, const Twine &Name,
// Tell the linker that this function is expected to be imported from the
// 'env' module.
if (!F->hasFnAttribute("wasm-import-module")) {
- llvm::AttrBuilder B;
+ llvm::AttrBuilder B(M->getContext());
B.addAttribute("wasm-import-module", "env");
F->addFnAttrs(B);
}
if (!F->hasFnAttribute("wasm-import-name")) {
- llvm::AttrBuilder B;
+ llvm::AttrBuilder B(M->getContext());
B.addAttribute("wasm-import-name", F->getName());
F->addFnAttrs(B);
}
@@ -547,7 +548,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
for (unsigned I = 0, E = CI->arg_size(); I < E; ++I)
ArgAttributes.push_back(InvokeAL.getParamAttrs(I));
- AttrBuilder FnAttrs(InvokeAL.getFnAttrs());
+ AttrBuilder FnAttrs(CI->getContext(), InvokeAL.getFnAttrs());
if (FnAttrs.contains(Attribute::AllocSize)) {
// The allocsize attribute (if any) referes to parameters by index and needs
// to be adjusted.
@@ -610,6 +611,8 @@ static bool canLongjmp(const Value *Callee) {
return false;
StringRef CalleeName = Callee->getName();
+ // TODO Include more functions or consider checking with mangled prefixes
+
// The reason we include malloc/free here is to exclude the malloc/free
// calls generated in setjmp prep / cleanup routines.
if (CalleeName == "setjmp" || CalleeName == "malloc" || CalleeName == "free")
@@ -626,11 +629,50 @@ static bool canLongjmp(const Value *Callee) {
return false;
// Exception-catching related functions
- if (CalleeName == "__cxa_begin_catch" || CalleeName == "__cxa_end_catch" ||
+ //
+ // We intentionally excluded __cxa_end_catch here even though it surely cannot
+ // longjmp, in order to maintain the unwind relationship from all existing
+ // catchpads (and calls within them) to catch.dispatch.longjmp.
+ //
+ // In Wasm EH + Wasm SjLj, we
+ // 1. Make all catchswitch and cleanuppad that unwind to caller unwind to
+ // catch.dispatch.longjmp instead
+ // 2. Convert all longjmpable calls to invokes that unwind to
+ // catch.dispatch.longjmp
+ // But catchswitch BBs are removed in isel, so if an EH catchswitch (generated
+ // from an exception)'s catchpad does not contain any calls that are converted
+ // into invokes unwinding to catch.dispatch.longjmp, this unwind relationship
+ // (EH catchswitch BB -> catch.dispatch.longjmp BB) is lost and
+ // catch.dispatch.longjmp BB can be placed before the EH catchswitch BB in
+ // CFGSort.
+ // int ret = setjmp(buf);
+ // try {
+ // foo(); // longjmps
+ // } catch (...) {
+ // }
+ // Then in this code, if 'foo' longjmps, it first unwinds to 'catch (...)'
+ // catchswitch, and is not caught by that catchswitch because it is a longjmp,
+ // then it should next unwind to catch.dispatch.longjmp BB. But if this 'catch
+ // (...)' catchswitch -> catch.dispatch.longjmp unwind relationship is lost,
+ // it will not unwind to catch.dispatch.longjmp, producing an incorrect
+ // result.
+ //
+ // Every catchpad generated by Wasm C++ contains __cxa_end_catch, so we
+ // intentionally treat it as longjmpable to work around this problem. This is
+ // a hacky fix but an easy one.
+ //
+ // The comment block in findWasmUnwindDestinations() in
+ // SelectionDAGBuilder.cpp is addressing a similar problem.
+ if (CalleeName == "__cxa_begin_catch" ||
CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" ||
CalleeName == "__clang_call_terminate")
return false;
+ // std::terminate, which is generated when another exception occurs while
+ // handling an exception, cannot longjmp.
+ if (CalleeName == "_ZSt9terminatev")
+ return false;
+
// Otherwise we don't know
return true;
}
@@ -817,6 +859,32 @@ static bool containsLongjmpableCalls(const Function *F) {
return false;
}
+// When a function contains a setjmp call but not other calls that can longjmp,
+// we don't do setjmp transformation for that setjmp. But we need to convert the
+// setjmp calls into "i32 0" so they don't cause link time errors. setjmp always
+// returns 0 when called directly.
+static void nullifySetjmp(Function *F) {
+ Module &M = *F->getParent();
+ IRBuilder<> IRB(M.getContext());
+ Function *SetjmpF = M.getFunction("setjmp");
+ SmallVector<Instruction *, 1> ToErase;
+
+ for (User *U : SetjmpF->users()) {
+ auto *CI = dyn_cast<CallInst>(U);
+ // FIXME 'invoke' to setjmp can happen when we use Wasm EH + Wasm SjLj, but
+ // we don't support two being used together yet.
+ if (!CI)
+ report_fatal_error("Wasm EH + Wasm SjLj is not fully supported yet");
+ BasicBlock *BB = CI->getParent();
+ if (BB->getParent() != F) // in other function
+ continue;
+ ToErase.push_back(CI);
+ CI->replaceAllUsesWith(IRB.getInt32(0));
+ }
+ for (auto *I : ToErase)
+ I->eraseFromParent();
+}
+
bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n");
@@ -886,6 +954,10 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
EHTypeIDF = getEmscriptenFunction(EHTypeIDTy, "llvm_eh_typeid_for", &M);
}
+ // Functions that contains calls to setjmp but don't have other longjmpable
+ // calls within them.
+ SmallPtrSet<Function *, 4> SetjmpUsersToNullify;
+
if ((EnableEmSjLj || EnableWasmSjLj) && SetjmpF) {
// Precompute setjmp users
for (User *U : SetjmpF->users()) {
@@ -896,6 +968,8 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
// so can ignore it
if (containsLongjmpableCalls(UserF))
SetjmpUsers.insert(UserF);
+ else
+ SetjmpUsersToNullify.insert(UserF);
} else {
std::string S;
raw_string_ostream SS(S);
@@ -975,6 +1049,14 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
runSjLjOnFunction(*F);
}
+ // Replace unnecessary setjmp calls with 0
+ if ((EnableEmSjLj || EnableWasmSjLj) && !SetjmpUsersToNullify.empty()) {
+ Changed = true;
+ assert(SetjmpF);
+ for (Function *F : SetjmpUsersToNullify)
+ nullifySetjmp(F);
+ }
+
if (!Changed) {
// Delete unused global variables and functions
if (ResumeF)
@@ -1078,20 +1160,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
} else {
// This can't throw, and we don't need this invoke, just replace it with a
// call+branch
- SmallVector<Value *, 16> Args(II->args());
- CallInst *NewCall =
- IRB.CreateCall(II->getFunctionType(), II->getCalledOperand(), Args);
- NewCall->takeName(II);
- NewCall->setCallingConv(II->getCallingConv());
- NewCall->setDebugLoc(II->getDebugLoc());
- NewCall->setAttributes(II->getAttributes());
- II->replaceAllUsesWith(NewCall);
- ToErase.push_back(II);
-
- IRB.CreateBr(II->getNormalDest());
-
- // Remove any PHI node entries from the exception destination
- II->getUnwindDest()->removePredecessor(&BB);
+ changeToCall(II);
}
}
@@ -1243,16 +1312,19 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// Setjmp transformation
SmallVector<PHINode *, 4> SetjmpRetPHIs;
Function *SetjmpF = M.getFunction("setjmp");
- for (User *U : SetjmpF->users()) {
- auto *CI = dyn_cast<CallInst>(U);
- // FIXME 'invoke' to setjmp can happen when we use Wasm EH + Wasm SjLj, but
- // we don't support two being used together yet.
- if (!CI)
- report_fatal_error("Wasm EH + Wasm SjLj is not fully supported yet");
- BasicBlock *BB = CI->getParent();
+ for (auto *U : make_early_inc_range(SetjmpF->users())) {
+ auto *CB = dyn_cast<CallBase>(U);
+ BasicBlock *BB = CB->getParent();
if (BB->getParent() != &F) // in other function
continue;
+ CallInst *CI = nullptr;
+ // setjmp cannot throw. So if it is an invoke, lower it to a call
+ if (auto *II = dyn_cast<InvokeInst>(CB))
+ CI = llvm::changeToCall(II);
+ else
+ CI = cast<CallInst>(CB);
+
// The tail is everything right after the call, and will be reached once
// when setjmp is called, and later when longjmp returns to the setjmp
BasicBlock *Tail = SplitBlock(BB, CI->getNextNode());
@@ -1568,6 +1640,13 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj(
I->eraseFromParent();
}
+static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CPI) {
+ for (const User *U : CPI->users())
+ if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
+ return CRI->getUnwindDest();
+ return nullptr;
+}
+
// Create a catchpad in which we catch a longjmp's env and val arguments, test
// if the longjmp corresponds to one of setjmps in the current function, and if
// so, jump to the setjmp dispatch BB from which we go to one of post-setjmp
@@ -1619,18 +1698,18 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
BasicBlock::Create(C, "setjmp.dispatch", &F, OrigEntry);
cast<BranchInst>(Entry->getTerminator())->setSuccessor(0, SetjmpDispatchBB);
- // Create catch.dispatch.longjmp BB a catchswitch instruction
- BasicBlock *CatchSwitchBB =
+ // Create catch.dispatch.longjmp BB and a catchswitch instruction
+ BasicBlock *CatchDispatchLongjmpBB =
BasicBlock::Create(C, "catch.dispatch.longjmp", &F);
- IRB.SetInsertPoint(CatchSwitchBB);
- CatchSwitchInst *CatchSwitch =
+ IRB.SetInsertPoint(CatchDispatchLongjmpBB);
+ CatchSwitchInst *CatchSwitchLongjmp =
IRB.CreateCatchSwitch(ConstantTokenNone::get(C), nullptr, 1);
// Create catch.longjmp BB and a catchpad instruction
BasicBlock *CatchLongjmpBB = BasicBlock::Create(C, "catch.longjmp", &F);
- CatchSwitch->addHandler(CatchLongjmpBB);
+ CatchSwitchLongjmp->addHandler(CatchLongjmpBB);
IRB.SetInsertPoint(CatchLongjmpBB);
- CatchPadInst *CatchPad = IRB.CreateCatchPad(CatchSwitch, {});
+ CatchPadInst *CatchPad = IRB.CreateCatchPad(CatchSwitchLongjmp, {});
// Wasm throw and catch instructions can throw and catch multiple values, but
// that requires multivalue support in the toolchain, which is currently not
@@ -1696,9 +1775,9 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
// Convert all longjmpable call instructions to invokes that unwind to the
// newly created catch.dispatch.longjmp BB.
- SmallVector<Instruction *, 64> ToErase;
+ SmallVector<CallInst *, 64> LongjmpableCalls;
for (auto *BB = &*F.begin(); BB; BB = BB->getNextNode()) {
- for (Instruction &I : *BB) {
+ for (auto &I : *BB) {
auto *CI = dyn_cast<CallInst>(&I);
if (!CI)
continue;
@@ -1716,29 +1795,66 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
// setjmps in this function. We should not convert this call to an invoke.
if (CI == WasmLongjmpCI)
continue;
- ToErase.push_back(CI);
+ LongjmpableCalls.push_back(CI);
+ }
+ }
- // Even if the callee function has attribute 'nounwind', which is true for
- // all C functions, it can longjmp, which means it can throw a Wasm
- // exception now.
- CI->removeFnAttr(Attribute::NoUnwind);
- if (Function *CalleeF = CI->getCalledFunction()) {
- CalleeF->removeFnAttr(Attribute::NoUnwind);
+ for (auto *CI : LongjmpableCalls) {
+ // Even if the callee function has attribute 'nounwind', which is true for
+ // all C functions, it can longjmp, which means it can throw a Wasm
+ // exception now.
+ CI->removeFnAttr(Attribute::NoUnwind);
+ if (Function *CalleeF = CI->getCalledFunction())
+ CalleeF->removeFnAttr(Attribute::NoUnwind);
+
+ // Change it to an invoke and make it unwind to the catch.dispatch.longjmp
+ // BB. If the call is enclosed in another catchpad/cleanuppad scope, unwind
+ // to its parent pad's unwind destination instead to preserve the scope
+ // structure. It will eventually unwind to the catch.dispatch.longjmp.
+ SmallVector<OperandBundleDef, 1> Bundles;
+ BasicBlock *UnwindDest = nullptr;
+ if (auto Bundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
+ Instruction *FromPad = cast<Instruction>(Bundle->Inputs[0]);
+ while (!UnwindDest && FromPad) {
+ if (auto *CPI = dyn_cast<CatchPadInst>(FromPad)) {
+ UnwindDest = CPI->getCatchSwitch()->getUnwindDest();
+ FromPad = nullptr; // stop searching
+ } else if (auto *CPI = dyn_cast<CleanupPadInst>(FromPad)) {
+ // getCleanupRetUnwindDest() can return nullptr when
+ // 1. This cleanuppad's matching cleanupret uwninds to caller
+ // 2. There is no matching cleanupret because it ends with
+ // unreachable.
+ // In case of 2, we need to traverse the parent pad chain.
+ UnwindDest = getCleanupRetUnwindDest(CPI);
+ FromPad = cast<Instruction>(CPI->getParentPad());
+ }
}
+ }
+ if (!UnwindDest)
+ UnwindDest = CatchDispatchLongjmpBB;
+ changeToInvokeAndSplitBasicBlock(CI, UnwindDest);
+ }
- IRB.SetInsertPoint(CI);
- BasicBlock *Tail = SplitBlock(BB, CI->getNextNode());
- // We will add a new invoke. So remove the branch created when we split
- // the BB
- ToErase.push_back(BB->getTerminator());
- SmallVector<Value *, 8> Args(CI->args());
- InvokeInst *II =
- IRB.CreateInvoke(CI->getFunctionType(), CI->getCalledOperand(), Tail,
- CatchSwitchBB, Args);
- II->takeName(CI);
- II->setDebugLoc(CI->getDebugLoc());
- II->setAttributes(CI->getAttributes());
- CI->replaceAllUsesWith(II);
+ SmallVector<Instruction *, 16> ToErase;
+ for (auto &BB : F) {
+ if (auto *CSI = dyn_cast<CatchSwitchInst>(BB.getFirstNonPHI())) {
+ if (CSI != CatchSwitchLongjmp && CSI->unwindsToCaller()) {
+ IRB.SetInsertPoint(CSI);
+ ToErase.push_back(CSI);
+ auto *NewCSI = IRB.CreateCatchSwitch(CSI->getParentPad(),
+ CatchDispatchLongjmpBB, 1);
+ NewCSI->addHandler(*CSI->handler_begin());
+ NewCSI->takeName(CSI);
+ CSI->replaceAllUsesWith(NewCSI);
+ }
+ }
+
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(BB.getTerminator())) {
+ if (CRI->unwindsToCaller()) {
+ IRB.SetInsertPoint(CRI);
+ ToErase.push_back(CRI);
+ IRB.CreateCleanupRet(CRI->getCleanupPad(), CatchDispatchLongjmpBB);
+ }
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
index 3a0bef8c765c..ca6f3f194645 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
@@ -26,6 +26,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <map>
+
using namespace llvm;
#define DEBUG_TYPE "wasm-lower-global-dtors"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 9d83a75a8247..6a6cac6d956f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -82,7 +82,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
// Split multiple-VN LiveIntervals into multiple LiveIntervals.
SmallVector<LiveInterval *, 4> SplitLIs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
auto &TRI = *MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
if (MRI.reg_nodbg_empty(Reg))
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index 8b8593ddcbdd..5682cadc1a64 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -95,7 +95,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
// TODO: This is fairly heavy-handed; find a better approach.
//
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
// Skip unused registers.
if (MRI.use_nodbg_empty(Reg))
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index fe127dec8aed..5252db4858b9 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -98,7 +98,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Interesting register intervals:\n");
for (unsigned I = 0; I < NumVRegs; ++I) {
- unsigned VReg = Register::index2VirtReg(I);
+ Register VReg = Register::index2VirtReg(I);
if (MFI.isVRegStackified(VReg))
continue;
// Skip unused registers, which can use $drop.
@@ -135,7 +135,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) {
LiveInterval *LI = SortedIntervals[I];
- unsigned Old = LI->reg();
+ Register Old = LI->reg();
size_t Color = I;
const TargetRegisterClass *RC = MRI->getRegClass(Old);
@@ -152,7 +152,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
continue_outer:;
}
- unsigned New = SortedIntervals[Color]->reg();
+ Register New = SortedIntervals[Color]->reg();
SlotMapping[I] = New;
Changed |= Old != New;
UsedColors.set(Color);
@@ -168,7 +168,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
// Rewrite register operands.
for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) {
- unsigned Old = SortedIntervals[I]->reg();
+ Register Old = SortedIntervals[I]->reg();
unsigned New = SlotMapping[I];
if (Old != New)
MRI->replaceRegWith(Old, New);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
index c73b8a29daeb..76c78cd23130 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -89,7 +89,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
// Start the numbering for locals after the arg regs
unsigned CurReg = MFI.getParams().size();
for (unsigned VRegIdx = 0; VRegIdx < NumVRegs; ++VRegIdx) {
- unsigned VReg = Register::index2VirtReg(VRegIdx);
+ Register VReg = Register::index2VirtReg(VRegIdx);
// Skip unused registers.
if (MRI.use_empty(VReg))
continue;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 42419259802e..d3ad47147ac8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -909,8 +909,8 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
SubsequentUse != Use.getParent()->uses().end()) {
if (!SubsequentDef->isReg() || !SubsequentUse->isReg())
break;
- unsigned DefReg = SubsequentDef->getReg();
- unsigned UseReg = SubsequentUse->getReg();
+ Register DefReg = SubsequentDef->getReg();
+ Register UseReg = SubsequentUse->getReg();
// TODO: This single-use restriction could be relaxed by using tees
if (DefReg != UseReg || !MRI.hasOneUse(DefReg))
break;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index add3c799f4aa..912f61765579 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -42,8 +42,7 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
const std::string &FS,
const TargetMachine &TM)
: WebAssemblyGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
- TargetTriple(TT), FrameLowering(),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TSInfo(),
+ TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this) {}
bool WebAssemblySubtarget::enableAtomicExpand() const {
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 2ba0b97229cc..e9ecff3bf514 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -216,7 +216,7 @@ private:
// The operator on the top of the stack has higher precedence than the
// new operator.
unsigned ParenCount = 0;
- while (1) {
+ while (true) {
// Nothing to process.
if (InfixOperatorStack.empty())
break;
@@ -3030,7 +3030,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
ForcedDispEncoding = DispEncoding_Default;
// Parse pseudo prefixes.
- while (1) {
+ while (true) {
if (Name == "{") {
if (getLexer().isNot(AsmToken::Identifier))
return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
@@ -3370,7 +3370,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
// Read the operands.
- while(1) {
+ while (true) {
if (ParseOperand(Operands))
return true;
if (HandleAVX512Operand(Operands))
diff --git a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp
new file mode 100644
index 000000000000..78379290aae9
--- /dev/null
+++ b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp
@@ -0,0 +1,64 @@
+//===------------------- X86CustomBehaviour.cpp -----------------*-C++ -* -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements methods from the X86CustomBehaviour class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86CustomBehaviour.h"
+#include "TargetInfo/X86TargetInfo.h"
+#include "X86InstrInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/WithColor.h"
+
+namespace llvm {
+namespace mca {
+
+void X86InstrPostProcess::setMemBarriers(std::unique_ptr<Instruction> &Inst,
+ const MCInst &MCI) {
+ switch (MCI.getOpcode()) {
+ case X86::MFENCE:
+ Inst->setLoadBarrier(true);
+ Inst->setStoreBarrier(true);
+ break;
+ case X86::LFENCE:
+ Inst->setLoadBarrier(true);
+ break;
+ case X86::SFENCE:
+ Inst->setStoreBarrier(true);
+ break;
+ }
+}
+
+void X86InstrPostProcess::postProcessInstruction(
+ std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
+ // Currently, we only modify certain instructions' IsALoadBarrier and
+ // IsAStoreBarrier flags.
+ setMemBarriers(Inst, MCI);
+}
+
+} // namespace mca
+} // namespace llvm
+
+using namespace llvm;
+using namespace mca;
+
+static InstrPostProcess *createX86InstrPostProcess(const MCSubtargetInfo &STI,
+ const MCInstrInfo &MCII) {
+ return new X86InstrPostProcess(STI, MCII);
+}
+
+/// Extern function to initialize the targets for the X86 backend
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86TargetMCA() {
+ TargetRegistry::RegisterInstrPostProcess(getTheX86_32Target(),
+ createX86InstrPostProcess);
+ TargetRegistry::RegisterInstrPostProcess(getTheX86_64Target(),
+ createX86InstrPostProcess);
+}
diff --git a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
new file mode 100644
index 000000000000..24d26751f0a1
--- /dev/null
+++ b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
@@ -0,0 +1,47 @@
+//===-------------------- X86CustomBehaviour.h ------------------*-C++ -* -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the X86CustomBehaviour class which inherits from
+/// CustomBehaviour. This class is used by the tool llvm-mca to enforce
+/// target specific behaviour that is not expressed well enough in the
+/// scheduling model for mca to enforce it automatically.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_MCA_X86CUSTOMBEHAVIOUR_H
+#define LLVM_LIB_TARGET_X86_MCA_X86CUSTOMBEHAVIOUR_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/CustomBehaviour.h"
+#include "llvm/Support/TargetParser.h"
+
+namespace llvm {
+namespace mca {
+
+class X86InstrPostProcess : public InstrPostProcess {
+ void processWaitCnt(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
+
+ /// Called within X86InstrPostProcess to specify certain instructions
+ /// as load and store barriers.
+ void setMemBarriers(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
+
+public:
+ X86InstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
+ : InstrPostProcess(STI, MCII) {}
+
+ ~X86InstrPostProcess() {}
+
+ void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
+ const MCInst &MCI) override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
index bb12ede3b729..fd82bdcd1a23 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
@@ -40,4 +40,4 @@ protected:
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_X86_MCTARGETDESC_X86ATTINSTPRINTER_H
+#endif // LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INSTPRINTERCOMMON_H
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 9da0a8129f23..8913e405539e 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -111,6 +111,15 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
{codeview::RegisterId::EFLAGS, X86::EFLAGS},
+ {codeview::RegisterId::ST0, X86::ST0},
+ {codeview::RegisterId::ST1, X86::ST1},
+ {codeview::RegisterId::ST2, X86::ST2},
+ {codeview::RegisterId::ST3, X86::ST3},
+ {codeview::RegisterId::ST4, X86::ST4},
+ {codeview::RegisterId::ST5, X86::ST5},
+ {codeview::RegisterId::ST6, X86::ST6},
+ {codeview::RegisterId::ST7, X86::ST7},
+
{codeview::RegisterId::ST0, X86::FP0},
{codeview::RegisterId::ST1, X86::FP1},
{codeview::RegisterId::ST2, X86::FP2},
@@ -281,8 +290,8 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
{codeview::RegisterId::AMD64_XMM31, X86::XMM31},
};
- for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
- MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
+ for (const auto &I : RegMap)
+ MRI->mapLLVMRegToCVReg(I.Reg, static_cast<int>(I.CVReg));
}
MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.h b/llvm/lib/Target/X86/X86AsmPrinter.h
index b22f25af26cf..94679e6e3d11 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -23,7 +23,6 @@ class MCCodeEmitter;
class MCStreamer;
class X86Subtarget;
class TargetMachine;
-struct ASanAccessInfo;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget = nullptr;
diff --git a/llvm/lib/Target/X86/X86CallLowering.h b/llvm/lib/Target/X86/X86CallLowering.h
index ac5b92bf4aae..0ad67cfd3532 100644
--- a/llvm/lib/Target/X86/X86CallLowering.h
+++ b/llvm/lib/Target/X86/X86CallLowering.h
@@ -20,8 +20,6 @@
namespace llvm {
template <typename T> class ArrayRef;
-class DataLayout;
-class MachineRegisterInfo;
class X86TargetLowering;
class X86CallLowering : public CallLowering {
diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp
index 47874e82ff3b..061fff50bcea 100644
--- a/llvm/lib/Target/X86/X86FastTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp
@@ -56,8 +56,6 @@ public:
bool isTileLoad(MachineInstr &MI);
bool isTileStore(MachineInstr &MI);
bool isAMXInstr(MachineInstr &MI);
- void getTileStoreShape(MachineInstr &MI,
- SmallVector<MachineOperand *> &ShapedTiles);
MachineInstr *getKeyAMXInstr(MachineInstr *MI);
void getTileShapesCfg(MachineInstr *MI,
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 0a7aea467809..51f2ced321bb 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -149,6 +149,17 @@ static unsigned getLEArOpcode(bool IsLP64) {
return IsLP64 ? X86::LEA64r : X86::LEA32r;
}
+static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
+ if (Use64BitReg) {
+ if (isUInt<32>(Imm))
+ return X86::MOV32ri64;
+ if (isInt<32>(Imm))
+ return X86::MOV64ri32;
+ return X86::MOV64ri;
+ }
+ return X86::MOV32ri;
+}
+
static bool isEAXLiveIn(MachineBasicBlock &MBB) {
for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
unsigned Reg = RegMask.PhysReg;
@@ -237,11 +248,10 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
else
Reg = TRI->findDeadCallerSavedReg(MBB, MBBI);
- unsigned MovRIOpc = Is64Bit ? X86::MOV64ri : X86::MOV32ri;
unsigned AddSubRROpc =
isSub ? getSUBrrOpcode(Is64Bit) : getADDrrOpcode(Is64Bit);
if (Reg) {
- BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Reg)
+ BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Reg)
.addImm(Offset)
.setMIFlag(Flag);
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
@@ -267,7 +277,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
Offset = -(Offset - SlotSize);
else
Offset = Offset + SlotSize;
- BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Rax)
+ BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Rax)
.addImm(Offset)
.setMIFlag(Flag);
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
@@ -434,7 +444,7 @@ void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
/// Emits Dwarf Info specifying offsets of callee saved registers and
/// frame pointer. This is called only when basic block sections are enabled.
-void X86FrameLowering::emitCalleeSavedFrameMoves(
+void X86FrameLowering::emitCalleeSavedFrameMovesFullCFA(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
MachineFunction &MF = *MBB.getParent();
if (!hasFP(MF)) {
@@ -469,7 +479,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
// Calculate offsets.
for (const CalleeSavedInfo &I : CSI) {
int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
if (IsPrologue) {
@@ -637,6 +647,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
uint64_t AlignOffset) const {
assert(Offset && "null offset");
+ const bool NeedsDwarfCFI = needsDwarfCFI(MF);
+ const bool HasFP = hasFP(MF);
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86TargetLowering &TLI = *STI.getTargetLowering();
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
@@ -676,17 +688,36 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
: Is64Bit ? X86::R11D
: X86::EAX;
+
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
// save loop bound
{
- const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset);
+ const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, BoundOffset);
BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
.addReg(FinalStackProbed)
- .addImm(Offset / StackProbeSize * StackProbeSize)
+ .addImm(BoundOffset)
.setMIFlag(MachineInstr::FrameSetup);
+
+ // while in the loop, use loop-invariant reg for CFI,
+ // instead of the stack pointer, which changes during the loop
+ if (!HasFP && NeedsDwarfCFI) {
+ // x32 uses the same DWARF register numbers as x86-64,
+ // so there isn't a register number for r11d, we must use r11 instead
+ const Register DwarfFinalStackProbed =
+ STI.isTarget64BitILP32()
+ ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
+ : FinalStackProbed;
+
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createDefCfaRegister(
+ nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
+ }
}
// allocate a page
@@ -725,15 +756,30 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
MBB.addSuccessor(testMBB);
// handle tail
- unsigned TailOffset = Offset % StackProbeSize;
+ const unsigned TailOffset = Offset % StackProbeSize;
+ MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
if (TailOffset) {
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset);
- BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr)
+ BuildMI(*tailMBB, TailMBBIter, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(TailOffset)
.setMIFlag(MachineInstr::FrameSetup);
}
+ // after the loop, switch back to stack pointer for CFI
+ if (!HasFP && NeedsDwarfCFI) {
+ // x32 uses the same DWARF register numbers as x86-64,
+ // so there isn't a register number for esp, we must use rsp instead
+ const Register DwarfStackPtr =
+ STI.isTarget64BitILP32()
+ ? Register(getX86SubSuperRegister(StackPtr, 64))
+ : Register(StackPtr);
+
+ BuildCFI(*tailMBB, TailMBBIter, DL,
+ MCCFIInstruction::createDefCfaRegister(
+ nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
+ }
+
// Update Live In information
recomputeLiveIns(*testMBB);
recomputeLiveIns(*tailMBB);
@@ -1705,19 +1751,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Handle the 64-bit Windows ABI case where we need to call __chkstk.
// Function prologue is responsible for adjusting the stack pointer.
int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
- if (isUInt<32>(Alloc)) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(Alloc)
- .setMIFlag(MachineInstr::FrameSetup);
- } else if (isInt<32>(Alloc)) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX)
- .addImm(Alloc)
- .setMIFlag(MachineInstr::FrameSetup);
- } else {
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
- .addImm(Alloc)
- .setMIFlag(MachineInstr::FrameSetup);
- }
+ BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
+ .addImm(Alloc)
+ .setMIFlag(MachineInstr::FrameSetup);
} else {
// Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
// We'll also use 4 already allocated bytes for EAX.
@@ -2497,7 +2533,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// Assign slots for GPRs. It increases frame size.
for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
continue;
@@ -2514,7 +2550,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
// Assign slots for XMMs.
for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
continue;
@@ -2560,7 +2596,7 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
const MachineFunction &MF = *MBB.getParent();
unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
continue;
@@ -2594,7 +2630,7 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
// Make XMM regs spilled. X86 does not have ability of push/pop XMM.
// It can be done by spilling XMMs to stack frame.
for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
continue;
@@ -2672,7 +2708,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(
// Reload XMMs from stack frame.
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (X86::GR64RegClass.contains(Reg) ||
X86::GR32RegClass.contains(Reg))
continue;
@@ -2689,7 +2725,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(
// POP GPRs.
unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
if (!X86::GR64RegClass.contains(Reg) &&
!X86::GR32RegClass.contains(Reg))
continue;
@@ -2944,15 +2980,16 @@ void X86FrameLowering::adjustForSegmentedStacks(
const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
- const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri;
if (IsNested)
BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
- BuildMI(allocMBB, DL, TII.get(MOVri), Reg10)
- .addImm(StackSize);
- BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
- .addImm(X86FI->getArgumentStackSize());
+ BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
+ .addImm(StackSize);
+ BuildMI(allocMBB, DL,
+ TII.get(getMOVriOpcode(IsLP64, X86FI->getArgumentStackSize())),
+ Reg11)
+ .addImm(X86FI->getArgumentStackSize());
} else {
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
.addImm(X86FI->getArgumentStackSize());
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index e18be0d26321..987facbfeae4 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -65,9 +65,8 @@ public:
void inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const override;
- void
- emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) const override;
+ void emitCalleeSavedFrameMovesFullCFA(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override;
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 7ed05fd0331d..5b90c67deae6 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -80,9 +80,9 @@ namespace {
bool NegateIndex = false;
X86ISelAddressMode()
- : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
- Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
- MCSym(nullptr), JT(-1), SymbolFlags(X86II::MO_NO_FLAG) {}
+ : BaseType(RegBase), Base_FrameIndex(0), Scale(1), Disp(0), GV(nullptr),
+ CP(nullptr), BlockAddr(nullptr), ES(nullptr), MCSym(nullptr), JT(-1),
+ SymbolFlags(X86II::MO_NO_FLAG) {}
bool hasSymbolicDisplacement() const {
return GV != nullptr || CP != nullptr || ES != nullptr ||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6f6361b6757b..aff72452af6c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1096,6 +1096,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ROTR, VT, Custom);
}
+ setOperationAction(ISD::FSHL, MVT::v16i8, Custom);
+ setOperationAction(ISD::FSHR, MVT::v16i8, Custom);
+ setOperationAction(ISD::FSHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::FSHR, MVT::v4i32, Custom);
+
setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
@@ -1284,6 +1289,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ROTR, VT, Custom);
}
+ setOperationAction(ISD::FSHL, MVT::v32i8, Custom);
+ setOperationAction(ISD::FSHR, MVT::v32i8, Custom);
+ setOperationAction(ISD::FSHL, MVT::v8i32, Custom);
+ setOperationAction(ISD::FSHR, MVT::v8i32, Custom);
+
// These types need custom splitting if their input is a 128-bit vector.
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
@@ -1688,6 +1698,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
}
+ setOperationAction(ISD::FSHL, MVT::v64i8, Custom);
+ setOperationAction(ISD::FSHR, MVT::v64i8, Custom);
+ setOperationAction(ISD::FSHL, MVT::v16i32, Custom);
+ setOperationAction(ISD::FSHR, MVT::v16i32, Custom);
+
if (Subtarget.hasDQI()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
@@ -5475,10 +5490,9 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
/// materialize the FP immediate as a load from a constant pool.
bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
- for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
- if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
+ for (const APFloat &FPImm : LegalFPImmediates)
+ if (Imm.bitwiseIsEqual(FPImm))
return true;
- }
return false;
}
@@ -6132,6 +6146,29 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
return DAG.getBitcast(VT, Vec);
}
+// Helper to determine if the ops are all the extracted subvectors come from a
+// single source. If we allow commute they don't have to be in order (Lo/Hi).
+static SDValue getSplitVectorSrc(SDValue LHS, SDValue RHS, bool AllowCommute) {
+ if (LHS.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ RHS.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ LHS.getValueType() != RHS.getValueType() ||
+ LHS.getOperand(0) != RHS.getOperand(0))
+ return SDValue();
+
+ SDValue Src = LHS.getOperand(0);
+ if (Src.getValueSizeInBits() != (LHS.getValueSizeInBits() * 2))
+ return SDValue();
+
+ unsigned NumElts = LHS.getValueType().getVectorNumElements();
+ if ((LHS.getConstantOperandAPInt(1) == 0 &&
+ RHS.getConstantOperandAPInt(1) == NumElts) ||
+ (AllowCommute && RHS.getConstantOperandAPInt(1) == 0 &&
+ LHS.getConstantOperandAPInt(1) == NumElts))
+ return Src;
+
+ return SDValue();
+}
+
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
const SDLoc &dl, unsigned vectorWidth) {
EVT VT = Vec.getValueType();
@@ -6850,8 +6887,8 @@ static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget,
DAG.computeKnownBits(RHS).countMaxActiveBits() <= EltSizeInBits)
return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS);
- if (DAG.ComputeMinSignedBits(LHS) <= EltSizeInBits &&
- DAG.ComputeMinSignedBits(RHS) <= EltSizeInBits)
+ if (DAG.ComputeMaxSignificantBits(LHS) <= EltSizeInBits &&
+ DAG.ComputeMaxSignificantBits(RHS) <= EltSizeInBits)
return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS);
}
@@ -7907,6 +7944,7 @@ static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
// Attempt to decode ops that could be represented as a shuffle mask.
// The decoded shuffle mask may contain a different number of elements to the
// destination value type.
+// TODO: Merge into getTargetShuffleInputs()
static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
SmallVectorImpl<int> &Mask,
SmallVectorImpl<SDValue> &Ops,
@@ -8355,6 +8393,9 @@ static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts,
APInt &KnownUndef, APInt &KnownZero,
const SelectionDAG &DAG, unsigned Depth,
bool ResolveKnownElts) {
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return false; // Limit search depth.
+
EVT VT = Op.getValueType();
if (!VT.isSimple() || !VT.isVector())
return false;
@@ -9233,8 +9274,13 @@ static bool isFoldableUseOfShuffle(SDNode *N) {
return true;
if (Opc == ISD::BITCAST) // Ignore bitcasts
return isFoldableUseOfShuffle(U);
- if (N->hasOneUse())
+ if (N->hasOneUse()) {
+ // TODO, there may be some general way to know if a SDNode can
+ // be folded. We now only know whether an MI is foldable.
+ if (Opc == X86ISD::VPDPBUSD && U->getOperand(2).getNode() != N)
+ return false;
return true;
+ }
}
return false;
}
@@ -10055,13 +10101,18 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
if (IsSubAdd)
return SDValue();
- // Do not generate X86ISD::ADDSUB node for 512-bit types even though
- // the ADDSUB idiom has been successfully recognized. There are no known
- // X86 targets with 512-bit ADDSUB instructions!
- // 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom
- // recognition.
- if (VT.is512BitVector())
- return SDValue();
+ // There are no known X86 targets with 512-bit ADDSUB instructions!
+ // Convert to blend(fsub,fadd).
+ if (VT.is512BitVector()) {
+ SmallVector<int> Mask;
+ for (int I = 0, E = VT.getVectorNumElements(); I != E; I += 2) {
+ Mask.push_back(I);
+ Mask.push_back(I + E + 1);
+ }
+ SDValue Sub = DAG.getNode(ISD::FSUB, DL, VT, Opnd0, Opnd1);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, Opnd0, Opnd1);
+ return DAG.getVectorShuffle(VT, DL, Sub, Add, Mask);
+ }
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
@@ -12162,12 +12213,13 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
return SDValue();
}
-/// Check whether a compaction lowering can be done by dropping even
-/// elements and compute how many times even elements must be dropped.
+/// Check whether a compaction lowering can be done by dropping even/odd
+/// elements and compute how many times even/odd elements must be dropped.
///
/// This handles shuffles which take every Nth element where N is a power of
/// two. Example shuffle masks:
///
+/// (even)
/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 0, 2, 4, 6, 8, 10, 12, 14
/// N = 1: 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
/// N = 2: 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12, 0, 4, 8, 12
@@ -12175,16 +12227,20 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
/// N = 3: 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8
/// N = 3: 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24
///
+/// (odd)
+/// N = 1: 1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14
+/// N = 1: 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+///
/// Any of these lanes can of course be undef.
///
/// This routine only supports N <= 3.
/// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here
/// for larger N.
///
-/// \returns N above, or the number of times even elements must be dropped if
-/// there is such a number. Otherwise returns zero.
-static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
- bool IsSingleInput) {
+/// \returns N above, or the number of times even/odd elements must be dropped
+/// if there is such a number. Otherwise returns zero.
+static int canLowerByDroppingElements(ArrayRef<int> Mask, bool MatchEven,
+ bool IsSingleInput) {
// The modulus for the shuffle vector entries is based on whether this is
// a single input or not.
int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
@@ -12192,6 +12248,7 @@ static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
"We should only be called with masks with a power-of-2 size!");
uint64_t ModMask = (uint64_t)ShuffleModulus - 1;
+ int Offset = MatchEven ? 0 : 1;
// We track whether the input is viable for all power-of-2 strides 2^1, 2^2,
// and 2^3 simultaneously. This is because we may have ambiguity with
@@ -12210,7 +12267,7 @@ static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
uint64_t N = j + 1;
// The shuffle mask must be equal to (i * 2^N) % M.
- if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask))
+ if ((uint64_t)(Mask[i] - Offset) == (((uint64_t)i << N) & ModMask))
IsAnyViable = true;
else
ViableForN[j] = false;
@@ -15724,7 +15781,7 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Attempt to lower using compaction, SSE41 is necessary for PACKUSDW.
// We could use SIGN_EXTEND_INREG+PACKSSDW for older targets but this seems to
// be slower than a PSHUFLW+PSHUFHW+PSHUFD chain.
- int NumEvenDrops = canLowerByDroppingEvenElements(Mask, false);
+ int NumEvenDrops = canLowerByDroppingElements(Mask, true, false);
if ((NumEvenDrops == 1 || NumEvenDrops == 2) && Subtarget.hasSSE41() &&
!Subtarget.hasVLX()) {
// Check if this is part of a 256-bit vector truncation.
@@ -15758,6 +15815,20 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Result;
}
+ // When compacting odd (upper) elements, use PACKSS pre-SSE41.
+ int NumOddDrops = canLowerByDroppingElements(Mask, false, false);
+ if (NumOddDrops == 1) {
+ bool HasSSE41 = Subtarget.hasSSE41();
+ V1 = DAG.getNode(HasSSE41 ? X86ISD::VSRLI : X86ISD::VSRAI, DL, MVT::v4i32,
+ DAG.getBitcast(MVT::v4i32, V1),
+ DAG.getTargetConstant(16, DL, MVT::i8));
+ V2 = DAG.getNode(HasSSE41 ? X86ISD::VSRLI : X86ISD::VSRAI, DL, MVT::v4i32,
+ DAG.getBitcast(MVT::v4i32, V2),
+ DAG.getTargetConstant(16, DL, MVT::i8));
+ return DAG.getNode(HasSSE41 ? X86ISD::PACKUS : X86ISD::PACKSS, DL,
+ MVT::v8i16, V1, V2);
+ }
+
// Try to lower by permuting the inputs into an unpack instruction.
if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2,
Mask, Subtarget, DAG))
@@ -16024,7 +16095,7 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Check for compaction patterns.
bool IsSingleInput = V2.isUndef();
- int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput);
+ int NumEvenDrops = canLowerByDroppingElements(Mask, true, IsSingleInput);
// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
// with PSHUFB. It is important to do this before we attempt to generate any
@@ -16135,6 +16206,19 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Result;
}
+ int NumOddDrops = canLowerByDroppingElements(Mask, false, IsSingleInput);
+ if (NumOddDrops == 1) {
+ V1 = DAG.getNode(X86ISD::VSRLI, DL, MVT::v8i16,
+ DAG.getBitcast(MVT::v8i16, V1),
+ DAG.getTargetConstant(8, DL, MVT::i8));
+ if (!IsSingleInput)
+ V2 = DAG.getNode(X86ISD::VSRLI, DL, MVT::v8i16,
+ DAG.getBitcast(MVT::v8i16, V2),
+ DAG.getTargetConstant(8, DL, MVT::i8));
+ return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1,
+ IsSingleInput ? V1 : V2);
+ }
+
// Handle multi-input cases by blending/unpacking single-input shuffles.
if (NumV2Elements > 0)
return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v16i8, V1, V2, Mask,
@@ -16538,20 +16622,19 @@ static SDValue lowerShuffleAsLanePermuteAndShuffle(
// If there are only inputs from one 128-bit lane, splitting will in fact be
// less expensive. The flags track whether the given lane contains an element
// that crosses to another lane.
+ bool AllLanes;
if (!Subtarget.hasAVX2()) {
bool LaneCrossing[2] = {false, false};
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
- if (!LaneCrossing[0] || !LaneCrossing[1])
- return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
+ AllLanes = LaneCrossing[0] && LaneCrossing[1];
} else {
bool LaneUsed[2] = {false, false};
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0)
LaneUsed[(Mask[i] % Size) / LaneSize] = true;
- if (!LaneUsed[0] || !LaneUsed[1])
- return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
+ AllLanes = LaneUsed[0] && LaneUsed[1];
}
// TODO - we could support shuffling V2 in the Flipped input.
@@ -16569,6 +16652,11 @@ static SDValue lowerShuffleAsLanePermuteAndShuffle(
assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) &&
"In-lane shuffle mask expected");
+ // If we're not using both lanes in each lane and the inlane mask is not
+ // repeating, then we're better off splitting.
+ if (!AllLanes && !is128BitLaneRepeatedShuffleMask(VT, InLaneMask))
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
+
// Flip the lanes, and shuffle the results which should now be in-lane.
MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
SDValue Flipped = DAG.getBitcast(PVT, V1);
@@ -22598,7 +22686,7 @@ SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
/// ISD::FROUND is defined to round to nearest with ties rounding away from 0.
/// This mode isn't supported in hardware on X86. But as long as we aren't
/// compiling with trapping math, we can emulate this with
-/// floor(X + copysign(nextafter(0.5, 0.0), X)).
+/// trunc(X + copysign(nextafter(0.5, 0.0), X)).
static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
@@ -23157,10 +23245,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
// For equality comparisons try to use SIGN_EXTEND if the input was
// truncate from something with enough sign bits.
if (Op0.getOpcode() == ISD::TRUNCATE) {
- if (DAG.ComputeMinSignedBits(Op0.getOperand(0)) <= 16)
+ if (DAG.ComputeMaxSignificantBits(Op0.getOperand(0)) <= 16)
ExtendOp = ISD::SIGN_EXTEND;
} else if (Op1.getOpcode() == ISD::TRUNCATE) {
- if (DAG.ComputeMinSignedBits(Op1.getOperand(0)) <= 16)
+ if (DAG.ComputeMaxSignificantBits(Op1.getOperand(0)) <= 16)
ExtendOp = ISD::SIGN_EXTEND;
}
}
@@ -24543,32 +24631,27 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
} else if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
-
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SDVTList CmpVTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
- // Apply further optimizations for special cases
- // (select (x != 0), -1, 0) -> neg & sbb
- // (select (x == 0), 0, -1) -> neg & sbb
- if (isNullConstant(Y) &&
- (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
+ // 'X - 1' sets the carry flag if X == 0.
+ // '0 - X' sets the carry flag if X != 0.
+ // Convert the carry flag to a -1/0 mask with sbb:
+ // select (X != 0), -1, Y --> 0 - X; or (sbb), Y
+ // select (X == 0), Y, -1 --> 0 - X; or (sbb), Y
+ // select (X != 0), Y, -1 --> X - 1; or (sbb), Y
+ // select (X == 0), -1, Y --> X - 1; or (sbb), Y
+ SDValue Sub;
+ if (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE)) {
SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
- SDValue Neg = DAG.getNode(X86ISD::SUB, DL, CmpVTs, Zero, CmpOp0);
- Zero = DAG.getConstant(0, DL, Op.getValueType());
- return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Neg.getValue(1));
+ Sub = DAG.getNode(X86ISD::SUB, DL, CmpVTs, Zero, CmpOp0);
+ } else {
+ SDValue One = DAG.getConstant(1, DL, CmpOp0.getValueType());
+ Sub = DAG.getNode(X86ISD::SUB, DL, CmpVTs, CmpOp0, One);
}
-
- Cmp = DAG.getNode(X86ISD::SUB, DL, CmpVTs,
- CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
-
- SDValue Zero = DAG.getConstant(0, DL, Op.getValueType());
- SDValue Res = // Res = 0 or -1.
- DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp.getValue(1));
-
- if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
- Res = DAG.getNOT(DL, Res, Res.getValueType());
-
- return DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
+ SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
+ Sub.getValue(1));
+ return DAG.getNode(ISD::OR, DL, VT, SBB, Y);
} else if (!Subtarget.hasCMov() && CondCode == X86::COND_E &&
Cmp.getOperand(0).getOpcode() == ISD::AND &&
isOneConstant(Cmp.getOperand(0).getOperand(1))) {
@@ -25725,9 +25808,9 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
/// necessary casting or extending for \p Mask when lowering masking intrinsics
static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
- SDValue PreservedSrc,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+ SDValue PreservedSrc,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
unsigned OpcodeSelect = ISD::VSELECT;
@@ -29743,20 +29826,106 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
bool IsFSHR = Op.getOpcode() == ISD::FSHR;
if (VT.isVector()) {
- assert(Subtarget.hasVBMI2() && "Expected VBMI2");
+ APInt APIntShiftAmt;
+ bool IsCstSplat = X86::isConstantSplat(Amt, APIntShiftAmt);
- if (IsFSHR)
- std::swap(Op0, Op1);
+ if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
+ if (IsFSHR)
+ std::swap(Op0, Op1);
- APInt APIntShiftAmt;
- if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
- uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
- SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
- return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
- {Op0, Op1, Imm}, DAG, Subtarget);
+ if (IsCstSplat) {
+ uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
+ SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
+ return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
+ {Op0, Op1, Imm}, DAG, Subtarget);
+ }
+ return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
+ {Op0, Op1, Amt}, DAG, Subtarget);
}
- return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
- {Op0, Op1, Amt}, DAG, Subtarget);
+ assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
+ VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&
+ "Unexpected funnel shift type!");
+
+ // fshl(x,y,z) -> unpack(y,x) << (z & (bw-1))) >> bw.
+ // fshr(x,y,z) -> unpack(y,x) >> (z & (bw-1))).
+ if (IsCstSplat)
+ return SDValue();
+
+ SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
+ SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
+ bool IsCst = ISD::isBuildVectorOfConstantSDNodes(AmtMod.getNode());
+
+ unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL;
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits);
+ MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2);
+
+ // Split 256-bit integers on XOP/pre-AVX2 targets.
+ // Split 512-bit integers on non 512-bit BWI targets.
+ if ((VT.is256BitVector() && ((Subtarget.hasXOP() && EltSizeInBits < 32) ||
+ !Subtarget.hasAVX2())) ||
+ (VT.is512BitVector() && !Subtarget.useBWIRegs() &&
+ EltSizeInBits < 32)) {
+ // Pre-mask the amount modulo using the wider vector.
+ Op = DAG.getNode(Op.getOpcode(), DL, VT, Op0, Op1, AmtMod);
+ return splitVectorOp(Op, DAG);
+ }
+
+ // Attempt to fold scalar shift as unpack(y,x) << zext(splat(z))
+ if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, ShiftOpc)) {
+ if (SDValue ScalarAmt = DAG.getSplatValue(AmtMod)) {
+ SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
+ SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
+ ScalarAmt = DAG.getZExtOrTrunc(ScalarAmt, DL, MVT::i32);
+ Lo = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Lo, ScalarAmt, Subtarget,
+ DAG);
+ Hi = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Hi, ScalarAmt, Subtarget,
+ DAG);
+ return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR);
+ }
+ }
+
+ MVT WideSVT = MVT::getIntegerVT(
+ std::min<unsigned>(EltSizeInBits * 2, Subtarget.hasBWI() ? 16 : 32));
+ MVT WideVT = MVT::getVectorVT(WideSVT, NumElts);
+
+ // If per-element shifts are legal, fallback to generic expansion.
+ if (supportedVectorVarShift(VT, Subtarget, ShiftOpc) || Subtarget.hasXOP())
+ return SDValue();
+
+ // Attempt to fold as:
+ // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z & (bw-1))) >> bw.
+ // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))).
+ if (supportedVectorVarShift(WideVT, Subtarget, ShiftOpc) &&
+ supportedVectorShiftWithImm(WideVT, Subtarget, ShiftOpc)) {
+ Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Op0);
+ Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op1);
+ AmtMod = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, AmtMod);
+ Op0 = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, WideVT, Op0,
+ EltSizeInBits, DAG);
+ SDValue Res = DAG.getNode(ISD::OR, DL, WideVT, Op0, Op1);
+ Res = DAG.getNode(ShiftOpc, DL, WideVT, Res, AmtMod);
+ if (!IsFSHR)
+ Res = getTargetVShiftByConstNode(X86ISD::VSRLI, DL, WideVT, Res,
+ EltSizeInBits, DAG);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
+ }
+
+ // Attempt to fold per-element (ExtVT) shift as unpack(y,x) << zext(z)
+ if ((IsCst && !IsFSHR && EltSizeInBits == 8) ||
+ supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) {
+ SDValue Z = DAG.getConstant(0, DL, VT);
+ SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
+ SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
+ SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z));
+ SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z));
+ SDValue Lo = DAG.getNode(ShiftOpc, DL, ExtVT, RLo, ALo);
+ SDValue Hi = DAG.getNode(ShiftOpc, DL, ExtVT, RHi, AHi);
+ return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR);
+ }
+
+ // Fallback to generic expansion.
+ return SDValue();
}
assert(
(VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
@@ -29901,8 +30070,9 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// Attempt to fold as unpack(x,x) << zext(splat(y)):
// rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
// rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
- // TODO: Handle vXi16 cases.
- if (EltSizeInBits == 8 || EltSizeInBits == 32) {
+ // TODO: Handle vXi16 cases on all targets.
+ if (EltSizeInBits == 8 || EltSizeInBits == 32 ||
+ (IsROTL && EltSizeInBits == 16 && !Subtarget.hasAVX())) {
if (SDValue BaseRotAmt = DAG.getSplatValue(AmtMod)) {
unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI;
SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
@@ -33013,7 +33183,7 @@ bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
// AVX512BW has shifts such as vpsllvw.
if (Subtarget.hasBWI() && Bits == 16)
- return false;
+ return false;
// Otherwise, it's significantly cheaper to shift by a scalar amount than by a
// fully general vector.
@@ -33029,6 +33199,11 @@ bool X86TargetLowering::isBinOp(unsigned Opcode) const {
case X86ISD::FMAX:
case X86ISD::FMIN:
case X86ISD::FANDN:
+ case X86ISD::VPSHA:
+ case X86ISD::VPSHL:
+ case X86ISD::VSHLV:
+ case X86ISD::VSRLV:
+ case X86ISD::VSRAV:
return true;
}
@@ -33285,9 +33460,7 @@ bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr,
MachineBasicBlock *BB) {
// Scan forward through BB for a use/def of EFLAGS.
- for (MachineBasicBlock::iterator miI = std::next(Itr), miE = BB->end();
- miI != miE; ++miI) {
- const MachineInstr& mi = *miI;
+ for (const MachineInstr &mi : llvm::make_range(std::next(Itr), BB->end())) {
if (mi.readsRegister(X86::EFLAGS))
return true;
// If we found a def, we can stop searching.
@@ -38724,6 +38897,8 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
case X86ISD::VBROADCAST:
case X86ISD::MOVDDUP:
case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
case X86ISD::VPERMI:
case X86ISD::VPERMILPI: {
if (N.getOperand(0).getValueType() == ShuffleVT &&
@@ -38877,9 +39052,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))
return R;
- if (SDValue R = canonicalizeShuffleWithBinOps(N, DAG, DL))
- return R;
-
// Handle specific target shuffles.
switch (Opcode) {
case X86ISD::MOVDDUP: {
@@ -39844,6 +40016,12 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
DCI))
return SDValue(N, 0);
+
+ // Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)).
+ // Perform this after other shuffle combines to allow inner shuffles to be
+ // combined away first.
+ if (SDValue BinOp = canonicalizeShuffleWithBinOps(Op, DAG, SDLoc(N)))
+ return BinOp;
}
return SDValue();
@@ -40037,6 +40215,24 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc, Op.getOperand(1)));
break;
}
+ case X86ISD::VPSHA:
+ case X86ISD::VPSHL:
+ case X86ISD::VSHLV:
+ case X86ISD::VSRLV:
+ case X86ISD::VSRAV: {
+ APInt LHSUndef, LHSZero;
+ APInt RHSUndef, RHSZero;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
+ Depth + 1))
+ return true;
+ KnownZero = LHSZero;
+ break;
+ }
case X86ISD::KSHIFTL: {
SDValue Src = Op.getOperand(0);
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
@@ -41799,6 +41995,37 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// (mul (zext a), (sext, b))
+static bool detectExtMul(SelectionDAG &DAG, const SDValue &Mul, SDValue &Op0,
+ SDValue &Op1) {
+ Op0 = Mul.getOperand(0);
+ Op1 = Mul.getOperand(1);
+
+ // The operand1 should be signed extend
+ if (Op0.getOpcode() == ISD::SIGN_EXTEND)
+ std::swap(Op0, Op1);
+
+ auto IsFreeTruncation = [](SDValue &Op) -> bool {
+ if ((Op.getOpcode() == ISD::ZERO_EXTEND ||
+ Op.getOpcode() == ISD::SIGN_EXTEND) &&
+ Op.getOperand(0).getScalarValueSizeInBits() <= 8)
+ return true;
+
+ auto *BV = dyn_cast<BuildVectorSDNode>(Op);
+ return (BV && BV->isConstant());
+ };
+
+ // (dpbusd (zext a), (sext, b)). Since the first operand should be unsigned
+ // value, we need to check Op0 is zero extended value. Op1 should be signed
+ // value, so we just check the signed bits.
+ if ((IsFreeTruncation(Op0) &&
+ DAG.computeKnownBits(Op0).countMaxActiveBits() <= 8) &&
+ (IsFreeTruncation(Op1) && DAG.ComputeMaxSignificantBits(Op1) <= 8))
+ return true;
+
+ return false;
+}
+
// Given a ABS node, detect the following pattern:
// (ABS (SUB (ZERO_EXTEND a), (ZERO_EXTEND b))).
// This is useful as it is the input into a SAD pattern.
@@ -41820,6 +42047,50 @@ static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) {
return true;
}
+static SDValue createVPDPBUSD(SelectionDAG &DAG, SDValue LHS, SDValue RHS,
+ unsigned &LogBias, const SDLoc &DL,
+ const X86Subtarget &Subtarget) {
+ // Extend or truncate to MVT::i8 first.
+ MVT Vi8VT =
+ MVT::getVectorVT(MVT::i8, LHS.getValueType().getVectorElementCount());
+ LHS = DAG.getZExtOrTrunc(LHS, DL, Vi8VT);
+ RHS = DAG.getSExtOrTrunc(RHS, DL, Vi8VT);
+
+ // VPDPBUSD(<16 x i32>C, <16 x i8>A, <16 x i8>B). For each dst element
+ // C[0] = C[0] + A[0]B[0] + A[1]B[1] + A[2]B[2] + A[3]B[3].
+ // The src A, B element type is i8, but the dst C element type is i32.
+ // When we calculate the reduce stage, we use src vector type vXi8 for it
+ // so we need logbias 2 to avoid extra 2 stages.
+ LogBias = 2;
+
+ unsigned RegSize = std::max(128u, (unsigned)Vi8VT.getSizeInBits());
+ if (Subtarget.hasVNNI() && !Subtarget.hasVLX())
+ RegSize = std::max(512u, RegSize);
+
+ // "Zero-extend" the i8 vectors. This is not a per-element zext, rather we
+ // fill in the missing vector elements with 0.
+ unsigned NumConcat = RegSize / Vi8VT.getSizeInBits();
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, Vi8VT));
+ Ops[0] = LHS;
+ MVT ExtendedVT = MVT::getVectorVT(MVT::i8, RegSize / 8);
+ SDValue DpOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
+ Ops[0] = RHS;
+ SDValue DpOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
+
+ // Actually build the DotProduct, split as 256/512 bits for
+ // AVXVNNI/AVX512VNNI.
+ auto DpBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
+ return DAG.getNode(X86ISD::VPDPBUSD, DL, VT, Ops);
+ };
+ MVT DpVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
+ SDValue Zero = DAG.getConstant(0, DL, DpVT);
+
+ return SplitOpsAndApply(DAG, Subtarget, DL, DpVT, {Zero, DpOp0, DpOp1},
+ DpBuilder, false);
+}
+
// Given two zexts of <k x i8> to <k x i32>, create a PSADBW of the inputs
// to these zexts.
static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
@@ -41967,18 +42238,19 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
Movmsk = DAG.getBitcast(MovmskVT, Match);
} else {
- // For all_of(setcc(vec,0,eq)) - avoid vXi64 comparisons if we don't have
- // PCMPEQQ (SSE41+), use PCMPEQD instead.
- if (BinOp == ISD::AND && !Subtarget.hasSSE41() &&
- Match.getOpcode() == ISD::SETCC &&
- ISD::isBuildVectorAllZeros(Match.getOperand(1).getNode()) &&
+ // For all_of(setcc(x,y,eq))
+ // - avoid vXi64 comparisons without PCMPEQQ (SSE41+), use PCMPEQD.
+ // - avoid vXi16 comparisons, use PMOVMSKB(PCMPEQB()).
+ if (BinOp == ISD::AND && Match.getOpcode() == ISD::SETCC &&
cast<CondCodeSDNode>(Match.getOperand(2))->get() ==
ISD::CondCode::SETEQ) {
SDValue Vec = Match.getOperand(0);
- if (Vec.getValueType().getScalarType() == MVT::i64 &&
- (2 * NumElts) <= MaxElts) {
+ EVT VecSVT = Vec.getValueType().getScalarType();
+ if ((VecSVT == MVT::i16 && !Subtarget.hasBWI()) ||
+ (VecSVT == MVT::i64 && !Subtarget.hasSSE41())) {
NumElts *= 2;
- EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ VecSVT = VecSVT.getHalfSizedIntegerVT(*DAG.getContext());
+ EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), VecSVT, NumElts);
MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
Match = DAG.getSetCC(
DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)),
@@ -42069,6 +42341,77 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
return DAG.getNode(ISD::SUB, DL, ExtractVT, Zero, Zext);
}
+static SDValue combineVPDPBUSDPattern(SDNode *Extract, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasVNNI() && !Subtarget.hasAVXVNNI())
+ return SDValue();
+
+ EVT ExtractVT = Extract->getValueType(0);
+ // Verify the type we're extracting is i32, as the output element type of
+ // vpdpbusd is i32.
+ if (ExtractVT != MVT::i32)
+ return SDValue();
+
+ EVT VT = Extract->getOperand(0).getValueType();
+ if (!isPowerOf2_32(VT.getVectorNumElements()))
+ return SDValue();
+
+ // Match shuffle + add pyramid.
+ ISD::NodeType BinOp;
+ SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD});
+
+ // We can't combine to vpdpbusd for zext, because each of the 4 multiplies
+ // done by vpdpbusd compute a signed 16-bit product that will be sign extended
+ // before adding into the accumulator.
+ // TODO:
+ // We also need to verify that the multiply has at least 2x the number of bits
+ // of the input. We shouldn't match
+ // (sign_extend (mul (vXi9 (zext (vXi8 X))), (vXi9 (zext (vXi8 Y)))).
+ // if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND))
+ // Root = Root.getOperand(0);
+
+ // If there was a match, we want Root to be a mul.
+ if (!Root || Root.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ // Check whether we have an extend and mul pattern
+ SDValue LHS, RHS;
+ if (!detectExtMul(DAG, Root, LHS, RHS))
+ return SDValue();
+
+ // Create the dot product instruction.
+ SDLoc DL(Extract);
+ unsigned StageBias;
+ SDValue DP = createVPDPBUSD(DAG, LHS, RHS, StageBias, DL, Subtarget);
+
+ // If the original vector was wider than 4 elements, sum over the results
+ // in the DP vector.
+ unsigned Stages = Log2_32(VT.getVectorNumElements());
+ EVT DpVT = DP.getValueType();
+
+ if (Stages > StageBias) {
+ unsigned DpElems = DpVT.getVectorNumElements();
+
+ for (unsigned i = Stages - StageBias; i > 0; --i) {
+ SmallVector<int, 16> Mask(DpElems, -1);
+ for (unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
+ Mask[j] = MaskEnd + j;
+
+ SDValue Shuffle =
+ DAG.getVectorShuffle(DpVT, DL, DP, DAG.getUNDEF(DpVT), Mask);
+ DP = DAG.getNode(ISD::ADD, DL, DpVT, DP, Shuffle);
+ }
+ }
+
+ // Return the lowest ExtractSizeInBits bits.
+ EVT ResVT =
+ EVT::getVectorVT(*DAG.getContext(), ExtractVT,
+ DpVT.getSizeInBits() / ExtractVT.getSizeInBits());
+ DP = DAG.getBitcast(ResVT, DP);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, DP,
+ Extract->getOperand(1));
+}
+
static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// PSADBW is only supported on SSE2 and up.
@@ -42676,6 +43019,9 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget))
return SAD;
+ if (SDValue VPDPBUSD = combineVPDPBUSDPattern(N, DAG, Subtarget))
+ return VPDPBUSD;
+
// Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK.
if (SDValue Cmp = combinePredicateReduction(N, DAG, Subtarget))
return Cmp;
@@ -42903,6 +43249,15 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
// multiplier, convert to 'and' + 'add'.
const APInt &TrueVal = TrueC->getAPIntValue();
const APInt &FalseVal = FalseC->getAPIntValue();
+
+ // We have a more efficient lowering for "(X == 0) ? Y : -1" using SBB.
+ if ((TrueVal.isAllOnes() || FalseVal.isAllOnes()) &&
+ Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETEQ || CC == ISD::SETNE)
+ return SDValue();
+ }
+
bool OV;
APInt Diff = TrueVal.ssub_ov(FalseVal, OV);
if (OV)
@@ -44052,6 +44407,23 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
// TESTZ(X,-1) == TESTZ(X,X)
if (ISD::isBuildVectorAllOnes(Op1.getNode()))
return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0);
+
+ // TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y)
+ // TODO: Add COND_NE handling?
+ if (CC == X86::COND_E && OpVT.is128BitVector() && Subtarget.hasAVX()) {
+ SDValue Src0 = peekThroughBitcasts(Op0);
+ SDValue Src1 = peekThroughBitcasts(Op1);
+ if (Src0.getOpcode() == ISD::OR && Src1.getOpcode() == ISD::OR) {
+ Src0 = getSplitVectorSrc(peekThroughBitcasts(Src0.getOperand(0)),
+ peekThroughBitcasts(Src0.getOperand(1)), true);
+ Src1 = getSplitVectorSrc(peekThroughBitcasts(Src1.getOperand(0)),
+ peekThroughBitcasts(Src1.getOperand(1)), true);
+ if (Src0 && Src1)
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
+ DAG.getBitcast(MVT::v4i64, Src0),
+ DAG.getBitcast(MVT::v4i64, Src1));
+ }
+ }
}
return SDValue();
@@ -44117,21 +44489,58 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
BCNumEltBits > NumEltBits &&
DAG.ComputeNumSignBits(BC) > (BCNumEltBits - NumEltBits)) {
SDLoc DL(EFLAGS);
- unsigned CmpMask = IsAnyOf ? 0 : ((1 << BCNumElts) - 1);
+ APInt CmpMask = APInt::getLowBitsSet(32, IsAnyOf ? 0 : BCNumElts);
return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, BC),
DAG.getConstant(CmpMask, DL, MVT::i32));
}
}
+ // MOVMSK(CONCAT(X,Y)) == 0 -> MOVMSK(OR(X,Y)).
+ // MOVMSK(CONCAT(X,Y)) != 0 -> MOVMSK(OR(X,Y)).
+ // MOVMSK(CONCAT(X,Y)) == -1 -> MOVMSK(AND(X,Y)).
+ // MOVMSK(CONCAT(X,Y)) != -1 -> MOVMSK(AND(X,Y)).
+ if (VecVT.is256BitVector()) {
+ SmallVector<SDValue> Ops;
+ if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops) &&
+ Ops.size() == 2) {
+ SDLoc DL(EFLAGS);
+ EVT SubVT = Ops[0].getValueType();
+ APInt CmpMask = APInt::getLowBitsSet(32, IsAnyOf ? 0 : NumElts / 2);
+ SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT, Ops);
+ V = DAG.getBitcast(VecVT.getHalfNumVectorElementsVT(), V);
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V),
+ DAG.getConstant(CmpMask, DL, MVT::i32));
+ }
+ }
+
// MOVMSK(PCMPEQ(X,0)) == -1 -> PTESTZ(X,X).
// MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X).
+ // MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(SUB(X,Y),SUB(X,Y)).
+ // MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(SUB(X,Y),SUB(X,Y)).
if (IsAllOf && Subtarget.hasSSE41()) {
+ MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
SDValue BC = peekThroughBitcasts(Vec);
- if (BC.getOpcode() == X86ISD::PCMPEQ &&
- ISD::isBuildVectorAllZeros(BC.getOperand(1).getNode())) {
- MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
- SDValue V = DAG.getBitcast(TestVT, BC.getOperand(0));
+ if (BC.getOpcode() == X86ISD::PCMPEQ) {
+ SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(),
+ BC.getOperand(0), BC.getOperand(1));
+ V = DAG.getBitcast(TestVT, V);
+ return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+ }
+ // Check for 256-bit split vector cases.
+ if (BC.getOpcode() == ISD::AND &&
+ BC.getOperand(0).getOpcode() == X86ISD::PCMPEQ &&
+ BC.getOperand(1).getOpcode() == X86ISD::PCMPEQ) {
+ SDValue LHS = BC.getOperand(0);
+ SDValue RHS = BC.getOperand(1);
+ LHS = DAG.getNode(ISD::SUB, SDLoc(LHS), LHS.getValueType(),
+ LHS.getOperand(0), LHS.getOperand(1));
+ RHS = DAG.getNode(ISD::SUB, SDLoc(RHS), RHS.getValueType(),
+ RHS.getOperand(0), RHS.getOperand(1));
+ LHS = DAG.getBitcast(TestVT, LHS);
+ RHS = DAG.getBitcast(TestVT, RHS);
+ SDValue V = DAG.getNode(ISD::OR, SDLoc(EFLAGS), TestVT, LHS, RHS);
return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
}
}
@@ -44162,23 +44571,28 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
// PMOVMSKB(PACKSSBW(LO(X), HI(X)))
// -> PMOVMSKB(BITCAST_v32i8(X)) & 0xAAAAAAAA.
if (CmpBits >= 16 && Subtarget.hasInt256() &&
- VecOp0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- VecOp1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- VecOp0.getOperand(0) == VecOp1.getOperand(0) &&
- VecOp0.getConstantOperandAPInt(1) == 0 &&
- VecOp1.getConstantOperandAPInt(1) == 8 &&
(IsAnyOf || (SignExt0 && SignExt1))) {
- SDLoc DL(EFLAGS);
- SDValue Result = DAG.getBitcast(MVT::v32i8, VecOp0.getOperand(0));
- Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
- unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF;
- if (!SignExt0 || !SignExt1) {
- assert(IsAnyOf && "Only perform v16i16 signmasks for any_of patterns");
- Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
- DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
+ if (SDValue Src = getSplitVectorSrc(VecOp0, VecOp1, true)) {
+ SDLoc DL(EFLAGS);
+ SDValue Result = peekThroughBitcasts(Src);
+ if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ) {
+ SDValue V = DAG.getNode(ISD::SUB, DL, Result.getValueType(),
+ Result.getOperand(0), Result.getOperand(1));
+ V = DAG.getBitcast(MVT::v4i64, V);
+ return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+ }
+ Result = DAG.getBitcast(MVT::v32i8, Result);
+ Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
+ unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF;
+ if (!SignExt0 || !SignExt1) {
+ assert(IsAnyOf &&
+ "Only perform v16i16 signmasks for any_of patterns");
+ Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
+ DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
+ }
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
+ DAG.getConstant(CmpMask, DL, MVT::i32));
}
- return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
- DAG.getConstant(CmpMask, DL, MVT::i32));
}
}
@@ -44732,7 +45146,8 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Sign bits must extend down to the lowest i16.
- if (DAG.ComputeMinSignedBits(N1) > 16 || DAG.ComputeMinSignedBits(N0) > 16)
+ if (DAG.ComputeMaxSignificantBits(N1) > 16 ||
+ DAG.ComputeMaxSignificantBits(N0) > 16)
return SDValue();
// At least one of the elements must be zero in the upper 17 bits, or can be
@@ -45224,33 +45639,28 @@ static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG,
// truncation trees that help us avoid lane crossing shuffles.
// TODO: There's a lot more we can do for PACK/HADD style shuffle combines.
// TODO: We don't handle vXf64 shuffles yet.
- if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32 &&
- BC0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- BC1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- BC0.getOperand(0) == BC1.getOperand(0) &&
- BC0.getOperand(0).getValueType().is256BitVector() &&
- BC0.getConstantOperandAPInt(1) == 0 &&
- BC1.getConstantOperandAPInt(1) ==
- BC0.getValueType().getVectorNumElements()) {
- SmallVector<SDValue> ShuffleOps;
- SmallVector<int> ShuffleMask, ScaledMask;
- SDValue Vec = peekThroughBitcasts(BC0.getOperand(0));
- if (getTargetShuffleInputs(Vec, ShuffleOps, ShuffleMask, DAG)) {
- resolveTargetShuffleInputsAndMask(ShuffleOps, ShuffleMask);
- // To keep the HOP LHS/RHS coherency, we must be able to scale the unary
- // shuffle to a v4X64 width - we can probably relax this in the future.
- if (!isAnyZero(ShuffleMask) && ShuffleOps.size() == 1 &&
- ShuffleOps[0].getValueType().is256BitVector() &&
- scaleShuffleElements(ShuffleMask, 4, ScaledMask)) {
- SDValue Lo, Hi;
- MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
- std::tie(Lo, Hi) = DAG.SplitVector(ShuffleOps[0], DL);
- Lo = DAG.getBitcast(SrcVT, Lo);
- Hi = DAG.getBitcast(SrcVT, Hi);
- SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi);
- Res = DAG.getBitcast(ShufVT, Res);
- Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ScaledMask);
- return DAG.getBitcast(VT, Res);
+ if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) {
+ if (SDValue BCSrc = getSplitVectorSrc(BC0, BC1, false)) {
+ SmallVector<SDValue> ShuffleOps;
+ SmallVector<int> ShuffleMask, ScaledMask;
+ SDValue Vec = peekThroughBitcasts(BCSrc);
+ if (getTargetShuffleInputs(Vec, ShuffleOps, ShuffleMask, DAG)) {
+ resolveTargetShuffleInputsAndMask(ShuffleOps, ShuffleMask);
+ // To keep the HOP LHS/RHS coherency, we must be able to scale the unary
+ // shuffle to a v4X64 width - we can probably relax this in the future.
+ if (!isAnyZero(ShuffleMask) && ShuffleOps.size() == 1 &&
+ ShuffleOps[0].getValueType().is256BitVector() &&
+ scaleShuffleElements(ShuffleMask, 4, ScaledMask)) {
+ SDValue Lo, Hi;
+ MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
+ std::tie(Lo, Hi) = DAG.SplitVector(ShuffleOps[0], DL);
+ Lo = DAG.getBitcast(SrcVT, Lo);
+ Hi = DAG.getBitcast(SrcVT, Hi);
+ SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi);
+ Res = DAG.getBitcast(ShufVT, Res);
+ Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ScaledMask);
+ return DAG.getBitcast(VT, Res);
+ }
}
}
}
@@ -46047,6 +46457,49 @@ static SDValue combineBitOpWithMOVMSK(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
}
+// Attempt to fold BITOP(SHIFT(X,Z),SHIFT(Y,Z)) -> SHIFT(BITOP(X,Y),Z).
+// NOTE: This is a very limited case of what SimplifyUsingDistributiveLaws
+// handles in InstCombine.
+static SDValue combineBitOpWithShift(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opc = N->getOpcode();
+ assert((Opc == ISD::OR || Opc == ISD::AND || Opc == ISD::XOR) &&
+ "Unexpected bit opcode");
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // Both operands must be single use.
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ // Search for matching shifts.
+ SDValue BC0 = peekThroughOneUseBitcasts(N0);
+ SDValue BC1 = peekThroughOneUseBitcasts(N1);
+
+ unsigned BCOpc = BC0.getOpcode();
+ EVT BCVT = BC0.getValueType();
+ if (BCOpc != BC1->getOpcode() || BCVT != BC1.getValueType())
+ return SDValue();
+
+ switch (BCOpc) {
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI:
+ case X86ISD::VSRAI: {
+ if (BC0.getOperand(1) != BC1.getOperand(1))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue BitOp =
+ DAG.getNode(Opc, DL, BCVT, BC0.getOperand(0), BC1.getOperand(0));
+ SDValue Shift = DAG.getNode(BCOpc, DL, BCVT, BitOp, BC0.getOperand(1));
+ return DAG.getBitcast(VT, Shift);
+ }
+ }
+
+ return SDValue();
+}
+
/// If this is a zero/all-bits result that is bitwise-anded with a low bits
/// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and'
/// with a shift-right to eliminate loading the vector constant mask value.
@@ -46350,6 +46803,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
return R;
+ if (SDValue R = combineBitOpWithShift(N, DAG))
+ return R;
+
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
return FPLogic;
@@ -46797,6 +47253,9 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
return R;
+ if (SDValue R = combineBitOpWithShift(N, DAG))
+ return R;
+
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
return FPLogic;
@@ -47837,7 +48296,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) &&
St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) {
- SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue());
+ SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32,
+ St->getValue().getOperand(0));
return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),
MVT::v16i8, St->getMemOperand());
}
@@ -48630,7 +49090,7 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
// originally concatenated from subvectors.
SmallVector<SDValue> ConcatOps;
if (VT.getSizeInBits() > 128 || !collectConcatOps(In.getNode(), ConcatOps))
- return SDValue();
+ return SDValue();
}
unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16);
@@ -48714,7 +49174,7 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
// sequence or using AVX512 truncations. If the inputs are sext/zext then the
// truncations may actually be free by peeking through to the ext source.
auto IsSext = [&DAG](SDValue V) {
- return DAG.ComputeMinSignedBits(V) <= 16;
+ return DAG.ComputeMaxSignificantBits(V) <= 16;
};
auto IsZext = [&DAG](SDValue V) {
return DAG.computeKnownBits(V).countMaxActiveBits() <= 16;
@@ -49268,6 +49728,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
return R;
+ if (SDValue R = combineBitOpWithShift(N, DAG))
+ return R;
+
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
return FPLogic;
@@ -52185,6 +52648,22 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
unsigned NumOps = Ops.size();
switch (Op0.getOpcode()) {
+ case X86ISD::VBROADCAST: {
+ if (!IsSplat && VT == MVT::v4f64 && llvm::all_of(Ops, [](SDValue Op) {
+ return Op.getOperand(0).getValueType().is128BitVector();
+ }))
+ return DAG.getNode(X86ISD::MOVDDUP, DL, VT,
+ ConcatSubOperand(VT, Ops, 0));
+ break;
+ }
+ case X86ISD::MOVDDUP:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP: {
+ if (!IsSplat)
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(VT, Ops, 0));
+ break;
+ }
case X86ISD::SHUFP: {
// Add SHUFPD support if/when necessary.
if (!IsSplat && VT.getScalarType() == MVT::f32 &&
@@ -52207,14 +52686,21 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
LLVM_FALLTHROUGH;
case X86ISD::VPERMILPI:
- // TODO - add support for vXf64/vXi64 shuffles.
if (!IsSplat && NumOps == 2 && (VT == MVT::v8f32 || VT == MVT::v8i32) &&
- Subtarget.hasAVX() && Op0.getOperand(1) == Ops[1].getOperand(1)) {
+ Op0.getOperand(1) == Ops[1].getOperand(1)) {
SDValue Res = DAG.getBitcast(MVT::v8f32, ConcatSubOperand(VT, Ops, 0));
Res = DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, Res,
Op0.getOperand(1));
return DAG.getBitcast(VT, Res);
}
+ if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) {
+ uint64_t Idx0 = Ops[0].getConstantOperandVal(1);
+ uint64_t Idx1 = Ops[1].getConstantOperandVal(1);
+ uint64_t Idx = ((Idx1 & 3) << 2) | (Idx0 & 3);
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(VT, Ops, 0),
+ DAG.getTargetConstant(Idx, DL, MVT::i8));
+ }
break;
case X86ISD::VPERMV3:
if (!IsSplat && NumOps == 2 && VT.is512BitVector()) {
@@ -52268,6 +52754,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
LLVM_FALLTHROUGH;
case X86ISD::VSRAI:
+ case X86ISD::VSHL:
+ case X86ISD::VSRL:
+ case X86ISD::VSRA:
if (((VT.is256BitVector() && Subtarget.hasInt256()) ||
(VT.is512BitVector() && Subtarget.useAVX512Regs() &&
(EltSizeInBits >= 32 || Subtarget.useBWIRegs()))) &&
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index d1d6e319f16b..3f6d567d3f4d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1540,7 +1540,7 @@ namespace llvm {
unsigned GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG &DAG) const;
- unsigned getAddressSpace(void) const;
+ unsigned getAddressSpace() const;
SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
SDValue &Chain) const;
diff --git a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
index 6642f46e64b2..7e751a4c8811 100644
--- a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -95,14 +95,45 @@ static bool IsCallReturnTwice(llvm::MachineOperand &MOp) {
return Attrs.hasFnAttr(Attribute::ReturnsTwice);
}
+// Checks if function should have an ENDBR in its prologue
+static bool needsPrologueENDBR(MachineFunction &MF, const Module *M) {
+ Function &F = MF.getFunction();
+
+ if (F.doesNoCfCheck())
+ return false;
+
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF.getTarget());
+ Metadata *IBTSeal = M->getModuleFlag("ibt-seal");
+
+ switch (TM->getCodeModel()) {
+ // Large code model functions always reachable through indirect calls.
+ case CodeModel::Large:
+ return true;
+ // Only address taken functions in LTO'ed kernel are reachable indirectly.
+ // IBTSeal implies LTO, thus only check if function is address taken.
+ case CodeModel::Kernel:
+ // Check if ibt-seal was enabled (implies LTO is being used).
+ if (IBTSeal) {
+ return F.hasAddressTaken();
+ }
+ // if !IBTSeal, fall into default case.
+ LLVM_FALLTHROUGH;
+ // Address taken or externally linked functions may be reachable.
+ default:
+ return (F.hasAddressTaken() || !F.hasLocalLinkage());
+ }
+}
+
bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &SubTarget = MF.getSubtarget<X86Subtarget>();
+ const Module *M = MF.getMMI().getModule();
// Check that the cf-protection-branch is enabled.
- Metadata *isCFProtectionSupported =
- MF.getMMI().getModule()->getModuleFlag("cf-protection-branch");
- // NB: We need to enable IBT in jitted code if JIT compiler is CET
- // enabled.
+ Metadata *isCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
+
+ // NB: We need to enable IBT in jitted code if JIT compiler is CET
+ // enabled.
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF.getTarget());
#ifdef __CET__
@@ -119,13 +150,8 @@ bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
TII = SubTarget.getInstrInfo();
EndbrOpcode = SubTarget.is64Bit() ? X86::ENDBR64 : X86::ENDBR32;
- // Large code model, non-internal function or function whose address
- // was taken, can be accessed through indirect calls. Mark the first
- // BB with ENDBR instruction unless nocf_check attribute is used.
- if ((TM->getCodeModel() == CodeModel::Large ||
- MF.getFunction().hasAddressTaken() ||
- !MF.getFunction().hasLocalLinkage()) &&
- !MF.getFunction().doesNoCfCheck()) {
+ // If function is reachable indirectly, mark the first BB with ENDBR.
+ if (needsPrologueENDBR(MF, M)) {
auto MBB = MF.begin();
Changed |= addENDBR(*MBB, MBB->begin());
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index ecd4777c3533..bc67d1f89d7f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -10537,13 +10537,12 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
string OpcodeStr, Predicate prd> {
-// TODO - Replace WriteMove with WriteVecTrunc?
let Predicates = [prd] in
- defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteMove>, EVEX_V512;
+ defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteMove>, EVEX_V256;
- defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteMove>, EVEX_V128;
+ defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
+ defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
}
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index c379aa8d9258..4dcd886fa3b2 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4088,8 +4088,8 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
Register SrcReg, Register SrcReg2,
int64_t ImmMask, int64_t ImmValue,
- const MachineInstr &OI,
- bool *IsSwapped) const {
+ const MachineInstr &OI, bool *IsSwapped,
+ int64_t *ImmDelta) const {
switch (OI.getOpcode()) {
case X86::CMP64rr:
case X86::CMP32rr:
@@ -4140,10 +4140,21 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
int64_t OIMask;
int64_t OIValue;
if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
- SrcReg == OISrcReg && ImmMask == OIMask && OIValue == ImmValue) {
- assert(SrcReg2 == X86::NoRegister && OISrcReg2 == X86::NoRegister &&
- "should not have 2nd register");
- return true;
+ SrcReg == OISrcReg && ImmMask == OIMask) {
+ if (OIValue == ImmValue) {
+ *ImmDelta = 0;
+ return true;
+ } else if (static_cast<uint64_t>(ImmValue) ==
+ static_cast<uint64_t>(OIValue) - 1) {
+ *ImmDelta = -1;
+ return true;
+ } else if (static_cast<uint64_t>(ImmValue) ==
+ static_cast<uint64_t>(OIValue) + 1) {
+ *ImmDelta = 1;
+ return true;
+ } else {
+ return false;
+ }
}
}
return FlagI.isIdenticalTo(OI);
@@ -4393,6 +4404,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
bool ShouldUpdateCC = false;
bool IsSwapped = false;
X86::CondCode NewCC = X86::COND_INVALID;
+ int64_t ImmDelta = 0;
// Search backward from CmpInstr for the next instruction defining EFLAGS.
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -4439,7 +4451,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// ... // EFLAGS not changed
// cmp x, y // <-- can be removed
if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
- Inst, &IsSwapped)) {
+ Inst, &IsSwapped, &ImmDelta)) {
Sub = &Inst;
break;
}
@@ -4473,7 +4485,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// It is safe to remove CmpInstr if EFLAGS is redefined or killed.
// If we are done with the basic block, we need to check whether EFLAGS is
// live-out.
- bool IsSafe = false;
+ bool FlagsMayLiveOut = true;
SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate;
MachineBasicBlock::iterator AfterCmpInstr =
std::next(MachineBasicBlock::iterator(CmpInstr));
@@ -4483,7 +4495,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// We should check the usage if this instruction uses and updates EFLAGS.
if (!UseEFLAGS && ModifyEFLAGS) {
// It is safe to remove CmpInstr if EFLAGS is updated again.
- IsSafe = true;
+ FlagsMayLiveOut = false;
break;
}
if (!UseEFLAGS && !ModifyEFLAGS)
@@ -4491,7 +4503,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// EFLAGS is used by this instruction.
X86::CondCode OldCC = X86::COND_INVALID;
- if (MI || IsSwapped) {
+ if (MI || IsSwapped || ImmDelta != 0) {
// We decode the condition code from opcode.
if (Instr.isBranch())
OldCC = X86::getCondFromBranch(Instr);
@@ -4545,9 +4557,59 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
ReplacementCC = getSwappedCondition(OldCC);
if (ReplacementCC == X86::COND_INVALID)
return false;
+ ShouldUpdateCC = true;
+ } else if (ImmDelta != 0) {
+ unsigned BitWidth = TRI->getRegSizeInBits(*MRI->getRegClass(SrcReg));
+ // Shift amount for min/max constants to adjust for 8/16/32 instruction
+ // sizes.
+ switch (OldCC) {
+ case X86::COND_L: // x <s (C + 1) --> x <=s C
+ if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_LE;
+ break;
+ case X86::COND_B: // x <u (C + 1) --> x <=u C
+ if (ImmDelta != 1 || CmpValue == 0)
+ return false;
+ ReplacementCC = X86::COND_BE;
+ break;
+ case X86::COND_GE: // x >=s (C + 1) --> x >s C
+ if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_G;
+ break;
+ case X86::COND_AE: // x >=u (C + 1) --> x >u C
+ if (ImmDelta != 1 || CmpValue == 0)
+ return false;
+ ReplacementCC = X86::COND_A;
+ break;
+ case X86::COND_G: // x >s (C - 1) --> x >=s C
+ if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_GE;
+ break;
+ case X86::COND_A: // x >u (C - 1) --> x >=u C
+ if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_AE;
+ break;
+ case X86::COND_LE: // x <=s (C - 1) --> x <s C
+ if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_L;
+ break;
+ case X86::COND_BE: // x <=u (C - 1) --> x <u C
+ if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_B;
+ break;
+ default:
+ return false;
+ }
+ ShouldUpdateCC = true;
}
- if ((ShouldUpdateCC || IsSwapped) && ReplacementCC != OldCC) {
+ if (ShouldUpdateCC && ReplacementCC != OldCC) {
// Push the MachineInstr to OpsToUpdate.
// If it is safe to remove CmpInstr, the condition code of these
// instructions will be modified.
@@ -4555,14 +4617,14 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
}
if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
// It is safe to remove CmpInstr if EFLAGS is updated again or killed.
- IsSafe = true;
+ FlagsMayLiveOut = false;
break;
}
}
- // If EFLAGS is not killed nor re-defined, we should check whether it is
- // live-out. If it is live-out, do not optimize.
- if ((MI || IsSwapped) && !IsSafe) {
+ // If we have to update users but EFLAGS is live-out abort, since we cannot
+ // easily find all of the users.
+ if ((MI != nullptr || ShouldUpdateCC) && FlagsMayLiveOut) {
for (MachineBasicBlock *Successor : CmpMBB.successors())
if (Successor->isLiveIn(X86::EFLAGS))
return false;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 537ada6222bf..33ce55bbdb2b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -643,7 +643,8 @@ private:
/// CMP %1, %2 and %3 = SUB %2, %1 ; IsSwapped=true
bool isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg,
Register SrcReg2, int64_t ImmMask, int64_t ImmValue,
- const MachineInstr &OI, bool *IsSwapped) const;
+ const MachineInstr &OI, bool *IsSwapped,
+ int64_t *ImmDelta) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 8abbaa92c8cf..28d57ca9ae3c 100644
--- a/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -153,8 +153,8 @@ private:
X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM,
const X86Subtarget &STI,
const X86RegisterBankInfo &RBI)
- : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI),
+ : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
+ RBI(RBI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "X86GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_INIT
diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index 6967a96ce83b..d0562214a025 100644
--- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -610,7 +610,7 @@ MachineInstr *X86OptimizeLEAPass::replaceDebugValue(MachineInstr &MI,
auto replaceOldReg = [OldReg, NewReg](const MachineOperand &Op) {
if (Op.isReg() && Op.getReg() == OldReg)
return MachineOperand::CreateReg(NewReg, false, false, false, false,
- false, false, false, false, 0,
+ false, false, false, false, false,
/*IsRenamable*/ true);
return Op;
};
diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp
index 47ae517ae76d..e92b1b002bb0 100644
--- a/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -129,10 +129,9 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
// Pad the identified basic blocks with NOOPs
- for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin();
- I != ReturnBBs.end(); ++I) {
- MachineBasicBlock *MBB = I->first;
- unsigned Cycles = I->second;
+ for (const auto &ReturnBB : ReturnBBs) {
+ MachineBasicBlock *MBB = ReturnBB.first;
+ unsigned Cycles = ReturnBB.second;
// Function::hasOptSize is already checked above.
bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp
index babd923e7496..4342ac089cae 100644
--- a/llvm/lib/Target/X86/X86PartialReduction.cpp
+++ b/llvm/lib/Target/X86/X86PartialReduction.cpp
@@ -13,15 +13,16 @@
//===----------------------------------------------------------------------===//
#include "X86.h"
+#include "X86TargetMachine.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsX86.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
-#include "X86TargetMachine.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
@@ -49,7 +50,7 @@ public:
}
private:
- bool tryMAddReplacement(Instruction *Op);
+ bool tryMAddReplacement(Instruction *Op, bool ReduceInOneBB);
bool trySADReplacement(Instruction *Op);
};
}
@@ -63,7 +64,43 @@ char X86PartialReduction::ID = 0;
INITIALIZE_PASS(X86PartialReduction, DEBUG_TYPE,
"X86 Partial Reduction", false, false)
-bool X86PartialReduction::tryMAddReplacement(Instruction *Op) {
+// This function should be aligned with detectExtMul() in X86ISelLowering.cpp.
+static bool matchVPDPBUSDPattern(const X86Subtarget *ST, BinaryOperator *Mul,
+ const DataLayout *DL) {
+ if (!ST->hasVNNI() && !ST->hasAVXVNNI())
+ return false;
+
+ Value *LHS = Mul->getOperand(0);
+ Value *RHS = Mul->getOperand(1);
+
+ if (isa<SExtInst>(LHS))
+ std::swap(LHS, RHS);
+
+ auto IsFreeTruncation = [&](Value *Op) {
+ if (auto *Cast = dyn_cast<CastInst>(Op)) {
+ if (Cast->getParent() == Mul->getParent() &&
+ (Cast->getOpcode() == Instruction::SExt ||
+ Cast->getOpcode() == Instruction::ZExt) &&
+ Cast->getOperand(0)->getType()->getScalarSizeInBits() <= 8)
+ return true;
+ }
+
+ return isa<Constant>(Op);
+ };
+
+ // (dpbusd (zext a), (sext, b)). Since the first operand should be unsigned
+ // value, we need to check LHS is zero extended value. RHS should be signed
+ // value, so we just check the signed bits.
+ if ((IsFreeTruncation(LHS) &&
+ computeKnownBits(LHS, *DL).countMaxActiveBits() <= 8) &&
+ (IsFreeTruncation(RHS) && ComputeMaxSignificantBits(RHS, *DL) <= 8))
+ return true;
+
+ return false;
+}
+
+bool X86PartialReduction::tryMAddReplacement(Instruction *Op,
+ bool ReduceInOneBB) {
if (!ST->hasSSE2())
return false;
@@ -82,6 +119,13 @@ bool X86PartialReduction::tryMAddReplacement(Instruction *Op) {
Value *LHS = Mul->getOperand(0);
Value *RHS = Mul->getOperand(1);
+ // If the target support VNNI, leave it to ISel to combine reduce operation
+ // to VNNI instruction.
+ // TODO: we can support transforming reduce to VNNI intrinsic for across block
+ // in this pass.
+ if (ReduceInOneBB && matchVPDPBUSDPattern(ST, Mul, DL))
+ return false;
+
// LHS and RHS should be only used once or if they are the same then only
// used twice. Only check this when SSE4.1 is enabled and we have zext/sext
// instructions, otherwise we use punpck to emulate zero extend in stages. The
@@ -300,7 +344,9 @@ bool X86PartialReduction::trySADReplacement(Instruction *Op) {
// Walk backwards from the ExtractElementInst and determine if it is the end of
// a horizontal reduction. Return the input to the reduction if we find one.
-static Value *matchAddReduction(const ExtractElementInst &EE) {
+static Value *matchAddReduction(const ExtractElementInst &EE,
+ bool &ReduceInOneBB) {
+ ReduceInOneBB = true;
// Make sure we're extracting index 0.
auto *Index = dyn_cast<ConstantInt>(EE.getIndexOperand());
if (!Index || !Index->isNullValue())
@@ -309,6 +355,8 @@ static Value *matchAddReduction(const ExtractElementInst &EE) {
const auto *BO = dyn_cast<BinaryOperator>(EE.getVectorOperand());
if (!BO || BO->getOpcode() != Instruction::Add || !BO->hasOneUse())
return nullptr;
+ if (EE.getParent() != BO->getParent())
+ ReduceInOneBB = false;
unsigned NumElems = cast<FixedVectorType>(BO->getType())->getNumElements();
// Ensure the reduction size is a power of 2.
@@ -321,6 +369,8 @@ static Value *matchAddReduction(const ExtractElementInst &EE) {
const auto *BO = dyn_cast<BinaryOperator>(Op);
if (!BO || BO->getOpcode() != Instruction::Add)
return nullptr;
+ if (EE.getParent() != BO->getParent())
+ ReduceInOneBB = false;
// If this isn't the first add, then it should only have 2 users, the
// shuffle and another add which we checked in the previous iteration.
@@ -460,9 +510,10 @@ bool X86PartialReduction::runOnFunction(Function &F) {
if (!EE)
continue;
+ bool ReduceInOneBB;
// First find a reduction tree.
// FIXME: Do we need to handle other opcodes than Add?
- Value *Root = matchAddReduction(*EE);
+ Value *Root = matchAddReduction(*EE, ReduceInOneBB);
if (!Root)
continue;
@@ -470,7 +521,7 @@ bool X86PartialReduction::runOnFunction(Function &F) {
collectLeaves(Root, Leaves);
for (Instruction *I : Leaves) {
- if (tryMAddReplacement(I)) {
+ if (tryMAddReplacement(I, ReduceInOneBB)) {
MadeChange = true;
continue;
}
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index a6ff472aac6f..8e317dc22bd6 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -255,6 +255,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [BWPort5], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
@@ -418,6 +419,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15],
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [BWPort015], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [BWPort0], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [BWPort5], 1, [1], 1>;
@@ -1741,4 +1743,40 @@ def BWSETA_SETBErm : SchedWriteVariant<[
def : InstRW<[BWSETA_SETBErr], (instrs SETCCr)>;
def : InstRW<[BWSETA_SETBErm], (instrs SETCCm)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr,
+
+ // int variants.
+ PXORrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX Zero-idioms.
+ DepBreakingClass<[
+ // xmm fp variants.
+ VXORPSrr, VXORPDrr,
+
+ // xmm int variants.
+ VPXORrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+ // ymm variants.
+ VXORPSYrr, VXORPDYrr, VPXORYrr,
+ VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
+ VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
+ ], ZeroIdiomPredicate>,
+]>;
+
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 371a9571ae39..1cd0b3379684 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -257,6 +257,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [HWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [HWPort5], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>;
defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>;
@@ -416,6 +417,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15],
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [HWPort015], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [HWPort0], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [HWPort5], 1, [1], 1>;
@@ -2030,4 +2032,40 @@ def HWSETA_SETBErm : SchedWriteVariant<[
def : InstRW<[HWSETA_SETBErr], (instrs SETCCr)>;
def : InstRW<[HWSETA_SETBErm], (instrs SETCCm)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr,
+
+ // int variants.
+ PXORrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX Zero-idioms.
+ DepBreakingClass<[
+ // xmm fp variants.
+ VXORPSrr, VXORPDrr,
+
+ // xmm int variants.
+ VPXORrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+ // ymm variants.
+ VXORPSYrr, VXORPDYrr, VPXORYrr,
+ VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
+ VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr
+ ], ZeroIdiomPredicate>,
+]>;
+
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 789de9eb5751..9fd986e34181 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -252,6 +252,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [ICXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [ICXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteFMoveZ, [ICXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [ICXPort05,ICXPort0156], 10, [9,1], 10>;
defm : ICXWriteResPair<WriteFAdd, [ICXPort01], 4, [1], 1, 5>; // Floating point add/sub.
@@ -367,6 +368,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [ICXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [ICXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveZ, [ICXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveToGpr, [ICXPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [ICXPort5], 1, [1], 1>;
@@ -2630,4 +2632,48 @@ def ICXSETA_SETBErm : SchedWriteVariant<[
def : InstRW<[ICXSETA_SETBErr], (instrs SETCCr)>;
def : InstRW<[ICXSETA_SETBErm], (instrs SETCCm)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr,
+
+ // int variants.
+ PXORrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX Zero-idioms.
+ DepBreakingClass<[
+ // xmm fp variants.
+ VXORPSrr, VXORPDrr,
+
+ // xmm int variants.
+ VPXORrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+ // ymm variants.
+ VXORPSYrr, VXORPDYrr, VPXORYrr,
+ VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
+ VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr,
+
+ // zmm variants.
+ VXORPSZrr, VXORPDZrr, VPXORDZrr, VPXORQZrr,
+ VXORPSZ128rr, VXORPDZ128rr, VPXORDZ128rr, VPXORQZ128rr,
+ VXORPSZ256rr, VXORPDZ256rr, VPXORDZ256rr, VPXORQZ256rr,
+ VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr,
+ VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr,
+ VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr,
+ ], ZeroIdiomPredicate>,
+]>;
+
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index af5c0540deb5..7e619a3a8722 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -223,6 +223,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1]
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SBPort5], 1, [1], 1>;
+defm : X86WriteRes<WriteFMoveZ, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
@@ -380,6 +381,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveZ, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveToGpr, [SBPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [SBPort5], 1, [1], 1>;
@@ -1230,4 +1232,35 @@ def SBSETA_SETBErm : SchedWriteVariant<[
def : InstRW<[SBSETA_SETBErr], (instrs SETCCr)>;
def : InstRW<[SBSETA_SETBErm], (instrs SETCCm)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr,
+
+ // int variants.
+ PXORrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX Zero-idioms.
+ DepBreakingClass<[
+ // xmm fp variants.
+ VXORPSrr, VXORPDrr,
+
+ // xmm int variants.
+ VPXORrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+ ], ZeroIdiomPredicate>,
+]>;
+
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index b3c13c72dd01..0a88bac5aa66 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -244,6 +244,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SKLPort015], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>;
defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 5>; // Floating point add/sub.
@@ -359,6 +360,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKLPort015], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [SKLPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [SKLPort5], 1, [1], 1>;
@@ -1901,4 +1903,40 @@ def SKLSETA_SETBErm : SchedWriteVariant<[
def : InstRW<[SKLSETA_SETBErr], (instrs SETCCr)>;
def : InstRW<[SKLSETA_SETBErm], (instrs SETCCm)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr,
+
+ // int variants.
+ PXORrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX Zero-idioms.
+ DepBreakingClass<[
+ // xmm fp variants.
+ VXORPSrr, VXORPDrr,
+
+ // xmm int variants.
+ VPXORrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+ // ymm variants.
+ VXORPSYrr, VXORPDYrr, VPXORYrr,
+ VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
+ VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
+ ], ZeroIdiomPredicate>,
+]>;
+
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 74f9da158353..b28a18f0dcd7 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -244,6 +244,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteFMoveZ, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>;
defm : SKXWriteResPair<WriteFAdd, [SKXPort01], 4, [1], 1, 5>; // Floating point add/sub.
@@ -359,6 +360,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveZ, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveToGpr, [SKXPort0], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [SKXPort5], 1, [1], 1>;
@@ -2613,4 +2615,48 @@ def SKXSETA_SETBErm : SchedWriteVariant<[
def : InstRW<[SKXSETA_SETBErr], (instrs SETCCr)>;
def : InstRW<[SKXSETA_SETBErm], (instrs SETCCm)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr,
+
+ // int variants.
+ PXORrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX Zero-idioms.
+ DepBreakingClass<[
+ // xmm fp variants.
+ VXORPSrr, VXORPDrr,
+
+ // xmm int variants.
+ VPXORrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+
+ // ymm variants.
+ VXORPSYrr, VXORPDYrr, VPXORYrr,
+ VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
+ VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr,
+
+ // zmm variants.
+ VXORPSZrr, VXORPDZrr, VPXORDZrr, VPXORQZrr,
+ VXORPSZ128rr, VXORPDZ128rr, VPXORDZ128rr, VPXORQZ128rr,
+ VXORPSZ256rr, VXORPDZ256rr, VPXORDZ256rr, VPXORQZ256rr,
+ VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr,
+ VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr,
+ VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr,
+ ], ZeroIdiomPredicate>,
+]>;
+
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index 1cb48175260a..d57e14715a4e 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -239,6 +239,7 @@ def WriteFMaskedStore64Y : SchedWrite;
def WriteFMove : SchedWrite;
def WriteFMoveX : SchedWrite;
def WriteFMoveY : SchedWrite;
+def WriteFMoveZ : SchedWrite;
defm WriteFAdd : X86SchedWritePair<ReadAfterVecLd>; // Floating point add/sub.
defm WriteFAddX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point add/sub (XMM).
@@ -354,6 +355,7 @@ def WriteVecMaskedStore64Y : SchedWrite;
def WriteVecMove : SchedWrite;
def WriteVecMoveX : SchedWrite;
def WriteVecMoveY : SchedWrite;
+def WriteVecMoveZ : SchedWrite;
def WriteVecMoveToGpr : SchedWrite;
def WriteVecMoveFromGpr : SchedWrite;
@@ -516,9 +518,11 @@ def WriteFMoveLSX
: X86SchedWriteMoveLS<WriteFMoveX, WriteFLoadX, WriteFStoreX>;
def WriteFMoveLSY
: X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreY>;
+def WriteFMoveLSZ
+ : X86SchedWriteMoveLS<WriteFMoveZ, WriteFLoadY, WriteFStoreY>;
def SchedWriteFMoveLS
: X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
- WriteFMoveLSY, WriteFMoveLSY>;
+ WriteFMoveLSY, WriteFMoveLSZ>;
def WriteFMoveLSNT
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
@@ -536,9 +540,11 @@ def WriteVecMoveLSX
: X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadX, WriteVecStoreX>;
def WriteVecMoveLSY
: X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadY, WriteVecStoreY>;
+def WriteVecMoveLSZ
+ : X86SchedWriteMoveLS<WriteVecMoveZ, WriteVecLoadY, WriteVecStoreY>;
def SchedWriteVecMoveLS
: X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
- WriteVecMoveLSY, WriteVecMoveLSY>;
+ WriteVecMoveLSY, WriteVecMoveLSZ>;
def WriteVecMoveLSNT
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index 0fedfc01092c..8ae8e574f87a 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -229,6 +229,7 @@ defm : X86WriteResUnsupported<WriteFMaskedStore64Y>;
def : WriteRes<WriteFMove, [AtomPort01]>;
def : WriteRes<WriteFMoveX, [AtomPort01]>;
defm : X86WriteResUnsupported<WriteFMoveY>;
+defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [AtomPort01], 5, [5], 1>;
@@ -382,6 +383,7 @@ defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
def : WriteRes<WriteVecMove, [AtomPort0]>;
def : WriteRes<WriteVecMoveX, [AtomPort01]>;
defm : X86WriteResUnsupported<WriteVecMoveY>;
+defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [AtomPort0], 3, [3], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [AtomPort0], 1, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
index 0f6f24f9f1fe..cb75c3660728 100644
--- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -772,6 +772,7 @@ defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2
defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>;
defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>;
defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
+defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>;
@@ -1107,6 +1108,7 @@ defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>;
defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>;
defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>;
+defm : X86WriteResUnsupported<WriteVecMoveZ>;
def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
}
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index a070da34cab5..4b2fa87a25b5 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -525,6 +525,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU
defm : X86WriteRes<WriteFMove, [JFPU01, JFPX], 1, [1, 1], 1>;
defm : X86WriteRes<WriteFMoveX, [JFPU01, JFPX], 1, [1, 1], 1>;
defm : X86WriteRes<WriteFMoveY, [JFPU01, JFPX], 1, [2, 2], 2>;
+defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [JFPU01, JFPX], 2, [1, 1], 1>;
@@ -682,6 +683,7 @@ defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
defm : X86WriteRes<WriteVecMove, [JFPU01, JVALU], 1, [1, 1], 1>;
defm : X86WriteRes<WriteVecMoveX, [JFPU01, JVALU], 1, [1, 1], 1>;
defm : X86WriteRes<WriteVecMoveY, [JFPU01, JVALU], 1, [2, 2], 2>;
+defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [JFPU0, JFPA, JALU0], 4, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [JFPU01, JFPX], 8, [1, 1], 2>;
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 36e5b55a4194..52605c031617 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -200,6 +200,7 @@ def : WriteRes<WriteFMaskedStore64Y, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveY, [SLM_FPC_RSV01]>;
+defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
@@ -345,6 +346,7 @@ def : WriteRes<WriteVecMaskedStore64Y, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveY, [SLM_FPC_RSV01]>;
+defm : X86WriteResUnsupported<WriteVecMoveZ>;
def : WriteRes<WriteVecMoveToGpr, [SLM_IEC_RSV01]>;
def : WriteRes<WriteVecMoveFromGpr, [SLM_IEC_RSV01]>;
@@ -480,4 +482,22 @@ def: InstRW<[SLMWriteResGroup1rm], (instrs MMX_PADDQrm, PADDQrm,
MMX_PSUBQrm, PSUBQrm,
PCMPEQQrm)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ XOR32rr ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr,
+
+ // int variants.
+ PXORrr,
+ ], ZeroIdiomPredicate>,
+]>;
+
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 4343e1ed45d1..fe0484afd227 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -286,6 +286,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>;
@@ -404,6 +405,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [ZnFPU], 2, [1], 2>;
+defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [ZnFPU2], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [ZnFPU2], 3, [1], 1>;
defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>;
@@ -1541,4 +1543,83 @@ def : InstRW<[WriteMicrocoded], (instrs VZEROUPPER)>;
// VZEROALL.
def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[
+ SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr
+ ], ZeroIdiomPredicate>,
+
+ // MMX Zero-idioms.
+ DepBreakingClass<[
+ MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr,
+ MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr,
+ MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr,
+ MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
+
+ // int variants.
+ PXORrr, PANDNrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX XMM Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
+
+ // int variants.
+ VPXORrr, VPANDNrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX YMM Zero-idioms.
+ DepBreakingClass<[
+ // fp variants
+ VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr,
+
+ // int variants
+ VPXORYrr, VPANDNYrr,
+ VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
+ VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
+ ], ZeroIdiomPredicate>
+]>;
+
+def : IsDepBreakingFunction<[
+ // GPR
+ DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
+ DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
+
+ // MMX
+ DepBreakingClass<[
+ MMX_PCMPEQBrr, MMX_PCMPEQWrr, MMX_PCMPEQDrr
+ ], ZeroIdiomPredicate>,
+
+ // SSE
+ DepBreakingClass<[
+ PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX XMM
+ DepBreakingClass<[
+ VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX YMM
+ DepBreakingClass<[
+ VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr
+ ], ZeroIdiomPredicate>,
+]>;
+
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 96d2837880c7..38908a987595 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -274,6 +274,7 @@ defm : X86WriteRes<WriteFStoreNTY, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteFMove, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [Zn2FPU], 1, [1], 1>;
+defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : Zn2WriteResFpuPair<WriteFAdd, [Zn2FPU0], 3>;
defm : Zn2WriteResFpuPair<WriteFAddX, [Zn2FPU0], 3>;
@@ -388,6 +389,7 @@ defm : X86WriteRes<WriteVecMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [Zn2FPU], 2, [1], 2>;
+defm : X86WriteResUnsupported<WriteVecMoveZ>;
defm : X86WriteRes<WriteVecMoveToGpr, [Zn2FPU2], 2, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [Zn2FPU2], 3, [1], 1>;
defm : X86WriteRes<WriteEMMS, [Zn2FPU], 2, [1], 1>;
@@ -1530,4 +1532,83 @@ def : InstRW<[WriteALU], (instrs VZEROUPPER)>;
// VZEROALL.
def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[
+ SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr
+ ], ZeroIdiomPredicate>,
+
+ // MMX Zero-idioms.
+ DepBreakingClass<[
+ MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr,
+ MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr,
+ MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr,
+ MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // SSE Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
+
+ // int variants.
+ PXORrr, PANDNrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX XMM Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
+
+ // int variants.
+ VPXORrr, VPANDNrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX YMM Zero-idioms.
+ DepBreakingClass<[
+ // fp variants
+ VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr,
+
+ // int variants
+ VPXORYrr, VPANDNYrr,
+ VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
+ VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
+ ], ZeroIdiomPredicate>
+]>;
+
+def : IsDepBreakingFunction<[
+ // GPR
+ DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
+ DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
+
+ // MMX
+ DepBreakingClass<[
+ MMX_PCMPEQBrr, MMX_PCMPEQWrr, MMX_PCMPEQDrr
+ ], ZeroIdiomPredicate>,
+
+ // SSE
+ DepBreakingClass<[
+ PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX XMM
+ DepBreakingClass<[
+ VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX YMM
+ DepBreakingClass<[
+ VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr
+ ], ZeroIdiomPredicate>,
+]>;
+
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td
index f4e03ac11f0b..02f7f8376fdb 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -1446,10 +1446,12 @@ defm : Zn3WriteResInt<WriteXCHG, [Zn3ALU0123], 0, [8], 2>; // Compare+Exc
defm : Zn3WriteResXMM<WriteFMove, [Zn3FPVMisc0123], 1, [1], 1>; // Empty sched class
defm : Zn3WriteResXMM<WriteFMoveX, [], 0, [], 1>;
defm : Zn3WriteResYMM<WriteFMoveY, [], 0, [], 1>;
+defm : X86WriteResUnsupported<WriteFMoveZ>;
defm : Zn3WriteResXMM<WriteVecMove, [Zn3FPFMisc0123], 1, [1], 1>; // MMX
defm : Zn3WriteResXMM<WriteVecMoveX, [], 0, [], 1>;
defm : Zn3WriteResYMM<WriteVecMoveY, [], 0, [], 1>;
+defm : X86WriteResUnsupported<WriteVecMoveZ>;
def : IsOptimizableRegisterMove<[
InstructionEquivalenceClass<[
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index 83a4a025f518..dba11e8b4000 100644
--- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -1139,7 +1139,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
// branch back to itself. We can do this here because at this point, every
// predecessor of this block has an available value. This is basically just
// automating the construction of a PHI node for this target.
- unsigned TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
+ Register TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
// Insert a comparison of the incoming target register with this block's
// address. This also requires us to mark the block as having its address
@@ -1642,7 +1642,7 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
return;
// Compute the current predicate state.
- unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
+ Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
auto InsertPt = MI.getIterator();
@@ -1913,7 +1913,7 @@ unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
auto *RC = MRI->getRegClass(Reg);
int Bytes = TRI->getRegSizeInBits(*RC) / 8;
- unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
+ Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) &&
"Unknown register size");
@@ -2078,7 +2078,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
// First, we transfer the predicate state into the called function by merging
// it into the stack pointer. This will kill the current def of the state.
- unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
+ Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
// If this call is also a return, it is a tail call and we don't need anything
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 78bc5519c23f..e3d0128dd73d 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -127,7 +127,7 @@ static std::string computeDataLayout(const Triple &TT) {
// Some ABIs align long double to 128 bits, others to 32.
if (TT.isOSNaCl() || TT.isOSIAMCU())
; // No f80
- else if (TT.isArch64Bit() || TT.isOSDarwin())
+ else if (TT.isArch64Bit() || TT.isOSDarwin() || TT.isWindowsMSVCEnvironment())
Ret += "-f80:128";
else
Ret += "-f80:32";
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index d8cd7311a0d5..5b95c10332dc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -43,6 +43,7 @@
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
@@ -3429,6 +3430,20 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
if (ICA.isTypeBasedOnly())
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
+ static const CostTblEntry AVX512BWCostTbl[] = {
+ { ISD::ROTL, MVT::v32i16, 2 },
+ { ISD::ROTL, MVT::v16i16, 2 },
+ { ISD::ROTL, MVT::v8i16, 2 },
+ { ISD::ROTL, MVT::v64i8, 5 },
+ { ISD::ROTL, MVT::v32i8, 5 },
+ { ISD::ROTL, MVT::v16i8, 5 },
+ { ISD::ROTR, MVT::v32i16, 2 },
+ { ISD::ROTR, MVT::v16i16, 2 },
+ { ISD::ROTR, MVT::v8i16, 2 },
+ { ISD::ROTR, MVT::v64i8, 5 },
+ { ISD::ROTR, MVT::v32i8, 5 },
+ { ISD::ROTR, MVT::v16i8, 5 }
+ };
static const CostTblEntry AVX512CostTbl[] = {
{ ISD::ROTL, MVT::v8i64, 1 },
{ ISD::ROTL, MVT::v4i64, 1 },
@@ -3506,6 +3521,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
MVT MTy = LT.second;
// Attempt to lookup cost.
+ if (ST->hasBWI())
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
if (ST->hasAVX512())
if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
@@ -4976,9 +4995,13 @@ InstructionCost X86TTIImpl::getGatherScatterOpCost(
const Instruction *I = nullptr) {
if (CostKind != TTI::TCK_RecipThroughput) {
if ((Opcode == Instruction::Load &&
- isLegalMaskedGather(SrcVTy, Align(Alignment))) ||
+ isLegalMaskedGather(SrcVTy, Align(Alignment)) &&
+ !forceScalarizeMaskedGather(cast<VectorType>(SrcVTy),
+ Align(Alignment))) ||
(Opcode == Instruction::Store &&
- isLegalMaskedScatter(SrcVTy, Align(Alignment))))
+ isLegalMaskedScatter(SrcVTy, Align(Alignment)) &&
+ !forceScalarizeMaskedScatter(cast<VectorType>(SrcVTy),
+ Align(Alignment))))
return 1;
return BaseT::getGatherScatterOpCost(Opcode, SrcVTy, Ptr, VariableMask,
Alignment, CostKind, I);
@@ -4993,9 +5016,13 @@ InstructionCost X86TTIImpl::getGatherScatterOpCost(
unsigned AddressSpace = PtrTy->getAddressSpace();
if ((Opcode == Instruction::Load &&
- !isLegalMaskedGather(SrcVTy, Align(Alignment))) ||
+ (!isLegalMaskedGather(SrcVTy, Align(Alignment)) ||
+ forceScalarizeMaskedGather(cast<VectorType>(SrcVTy),
+ Align(Alignment)))) ||
(Opcode == Instruction::Store &&
- !isLegalMaskedScatter(SrcVTy, Align(Alignment))))
+ (!isLegalMaskedScatter(SrcVTy, Align(Alignment)) ||
+ forceScalarizeMaskedScatter(cast<VectorType>(SrcVTy),
+ Align(Alignment)))))
return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment,
AddressSpace);
@@ -5118,35 +5145,21 @@ bool X86TTIImpl::supportsGather() const {
return ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2());
}
+bool X86TTIImpl::forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
+ // Gather / Scatter for vector 2 is not profitable on KNL / SKX
+ // Vector-4 of gather/scatter instruction does not exist on KNL. We can extend
+ // it to 8 elements, but zeroing upper bits of the mask vector will add more
+ // instructions. Right now we give the scalar cost of vector-4 for KNL. TODO:
+ // Check, maybe the gather/scatter instruction is better in the VariableMask
+ // case.
+ unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
+ return NumElts == 1 ||
+ (ST->hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
+}
+
bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) {
if (!supportsGather())
return false;
-
- // This function is called now in two cases: from the Loop Vectorizer
- // and from the Scalarizer.
- // When the Loop Vectorizer asks about legality of the feature,
- // the vectorization factor is not calculated yet. The Loop Vectorizer
- // sends a scalar type and the decision is based on the width of the
- // scalar element.
- // Later on, the cost model will estimate usage this intrinsic based on
- // the vector type.
- // The Scalarizer asks again about legality. It sends a vector type.
- // In this case we can reject non-power-of-2 vectors.
- // We also reject single element vectors as the type legalizer can't
- // scalarize it.
- if (auto *DataVTy = dyn_cast<FixedVectorType>(DataTy)) {
- unsigned NumElts = DataVTy->getNumElements();
- if (NumElts == 1)
- return false;
- // Gather / Scatter for vector 2 is not profitable on KNL / SKX
- // Vector-4 of gather/scatter instruction does not exist on KNL.
- // We can extend it to 8 elements, but zeroing upper bits of
- // the mask vector will add more instructions. Right now we give the scalar
- // cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter
- // instruction is better in the VariableMask case.
- if (ST->hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())))
- return false;
- }
Type *ScalarTy = DataTy->getScalarType();
if (ScalarTy->isPointerTy())
return true;
@@ -5187,9 +5200,48 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
const FeatureBitset &CalleeBits =
TM.getSubtargetImpl(*Callee)->getFeatureBits();
+ // Check whether features are the same (apart from the ignore list).
FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
- return (RealCallerBits & RealCalleeBits) == RealCalleeBits;
+ if (RealCallerBits == RealCalleeBits)
+ return true;
+
+ // If the features are a subset, we need to additionally check for calls
+ // that may become ABI-incompatible as a result of inlining.
+ if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
+ return false;
+
+ for (const Instruction &I : instructions(Callee)) {
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ SmallVector<Type *, 8> Types;
+ for (Value *Arg : CB->args())
+ Types.push_back(Arg->getType());
+ if (!CB->getType()->isVoidTy())
+ Types.push_back(CB->getType());
+
+ // Simple types are always ABI compatible.
+ auto IsSimpleTy = [](Type *Ty) {
+ return !Ty->isVectorTy() && !Ty->isAggregateType();
+ };
+ if (all_of(Types, IsSimpleTy))
+ continue;
+
+ if (Function *NestedCallee = CB->getCalledFunction()) {
+ // Assume that intrinsics are always ABI compatible.
+ if (NestedCallee->isIntrinsic())
+ continue;
+
+ // Do a precise compatibility check.
+ if (!areTypesABICompatible(Caller, NestedCallee, Types))
+ return false;
+ } else {
+ // We don't know the target features of the callee,
+ // assume it is incompatible.
+ return false;
+ }
+ }
+ }
+ return true;
}
bool X86TTIImpl::areTypesABICompatible(const Function *Caller,
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 11e9cb09c7d5..69715072426f 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -226,6 +226,10 @@ public:
bool isLegalMaskedStore(Type *DataType, Align Alignment);
bool isLegalNTLoad(Type *DataType, Align Alignment);
bool isLegalNTStore(Type *DataType, Align Alignment);
+ bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment);
+ bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
+ return forceScalarizeMaskedGather(VTy, Alignment);
+ }
bool isLegalMaskedGather(Type *DataType, Align Alignment);
bool isLegalMaskedScatter(Type *DataType, Align Alignment);
bool isLegalMaskedExpandLoad(Type *DataType);
diff --git a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index f2f89f4269ed..19ebcb3ea3e8 100644
--- a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -428,7 +428,7 @@ bool XCoreFrameLowering::spillCalleeSavedRegisters(
DL = MI->getDebugLoc();
for (const CalleeSavedInfo &I : CSI) {
- unsigned Reg = I.getReg();
+ Register Reg = I.getReg();
assert(Reg != XCore::LR && !(Reg == XCore::R10 && hasFP(*MF)) &&
"LR & FP are always handled in emitPrologue");
@@ -455,7 +455,7 @@ bool XCoreFrameLowering::restoreCalleeSavedRegisters(
if (!AtStart)
--BeforeI;
for (const CalleeSavedInfo &CSR : CSI) {
- unsigned Reg = CSR.getReg();
+ Register Reg = CSR.getReg();
assert(Reg != XCore::LR && !(Reg == XCore::R10 && hasFP(*MF)) &&
"LR & FP are always handled in emitEpilogue");
diff --git a/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
index 6799823f6fcb..0d1ba39b8b10 100644
--- a/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -97,7 +97,7 @@ static void InsertFPConstInst(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc dl = MI.getDebugLoc();
- unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
+ Register ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
RS->setRegUsed(ScratchOffset);
TII.loadImmediate(MBB, II, ScratchOffset, Offset);
@@ -174,7 +174,7 @@ static void InsertSPConstInst(MachineBasicBlock::iterator II,
} else
ScratchBase = Reg;
BuildMI(MBB, II, dl, TII.get(XCore::LDAWSP_ru6), ScratchBase).addImm(0);
- unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
+ Register ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
RS->setRegUsed(ScratchOffset);
TII.loadImmediate(MBB, II, ScratchOffset, Offset);
diff --git a/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/llvm/lib/Target/XCore/XCoreSubtarget.cpp
index 1be707cb488c..d4b777ef447f 100644
--- a/llvm/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -26,5 +26,5 @@ void XCoreSubtarget::anchor() { }
XCoreSubtarget::XCoreSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
- : XCoreGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(),
- FrameLowering(*this), TLInfo(TM, *this), TSInfo() {}
+ : XCoreGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), FrameLowering(*this),
+ TLInfo(TM, *this) {}